mirror of
https://github.com/perstarkse/minne.git
synced 2026-07-02 11:01:38 +02:00
feat: configure FastEmbed model in config and admin, with restart to apply
Expose fastembed_model in config and a model dropdown on Admin → Models. Persist dimension from the chosen model, require restart to load it, and align legacy OpenAI default settings so fresh local-embedding installs start cleanly.
This commit is contained in:
+3
-1
@@ -1,6 +1,8 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
## Unreleased
|
## Unreleased
|
||||||
- Search results are now selectable by which type, knowledge entity or ingested content
|
- Search results are now selectable by which type, knowledge entity or ingested content
|
||||||
|
- Now possible to choose the local embedding model via the admin ui
|
||||||
|
- Admin embedding changes are saved immediately. Needs restart to re-embed. This simplifies the flow, may be subject to improvement in the future
|
||||||
|
|
||||||
## 1.0.2 (2026-02-15)
|
## 1.0.2 (2026-02-15)
|
||||||
- Fix: edge case where navigation back to a chat page could trigger a new response generation
|
- Fix: edge case where navigation back to a chat page could trigger a new response generation
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
-- Align persisted embedding settings when FastEmbed is the recorded backend but the model
|
||||||
|
-- name is still the OpenAI migration default (invalid for FastEmbed `from_str`).
|
||||||
|
|
||||||
|
UPDATE system_settings:current SET
|
||||||
|
embedding_model = 'Xenova/bge-small-en-v1.5',
|
||||||
|
embedding_dimensions = 384
|
||||||
|
WHERE embedding_backend = 'fastembed'
|
||||||
|
AND embedding_model = 'text-embedding-3-small';
|
||||||
@@ -117,6 +117,10 @@ pub struct AppConfig {
|
|||||||
pub fastembed_show_download_progress: Option<bool>,
|
pub fastembed_show_download_progress: Option<bool>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub fastembed_max_length: Option<usize>,
|
pub fastembed_max_length: Option<usize>,
|
||||||
|
/// HuggingFace-style FastEmbed `model_code` (e.g. `Xenova/bge-small-en-v1.5`). Overrides
|
||||||
|
/// `system_settings.embedding_model` when `embedding_backend` is `fastembed`.
|
||||||
|
#[serde(default)]
|
||||||
|
pub fastembed_model: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub embedding_backend: EmbeddingBackend,
|
pub embedding_backend: EmbeddingBackend,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -226,6 +230,7 @@ impl Default for AppConfig {
|
|||||||
fastembed_cache_dir: None,
|
fastembed_cache_dir: None,
|
||||||
fastembed_show_download_progress: None,
|
fastembed_show_download_progress: None,
|
||||||
fastembed_max_length: None,
|
fastembed_max_length: None,
|
||||||
|
fastembed_model: None,
|
||||||
embedding_backend: EmbeddingBackend::default(),
|
embedding_backend: EmbeddingBackend::default(),
|
||||||
embedding_pool_size: None,
|
embedding_pool_size: None,
|
||||||
ingest_max_body_bytes: default_ingest_max_body_bytes(),
|
ingest_max_body_bytes: default_ingest_max_body_bytes(),
|
||||||
|
|||||||
@@ -6,13 +6,16 @@ use std::{
|
|||||||
thread::available_parallelism,
|
thread::available_parallelism,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use serde::Serialize;
|
||||||
|
use tracing::warn;
|
||||||
|
|
||||||
use async_openai::{types::CreateEmbeddingRequestArgs, Client};
|
use async_openai::{types::CreateEmbeddingRequestArgs, Client};
|
||||||
use fastembed::{EmbeddingModel, ModelTrait, TextEmbedding, TextInitOptions};
|
use fastembed::{EmbeddingModel, ModelTrait, TextEmbedding, TextInitOptions};
|
||||||
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
|
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::EmbeddingError,
|
error::{AppError, EmbeddingError},
|
||||||
storage::types::system_settings::SystemSettings,
|
storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
|
||||||
utils::config::AppConfig,
|
utils::config::AppConfig,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -59,6 +62,152 @@ enum EmbeddingInner {
|
|||||||
/// progress logging while still amortising per-call lock/dispatch overhead.
|
/// progress logging while still amortising per-call lock/dispatch overhead.
|
||||||
pub const RE_EMBED_BATCH_SIZE: usize = 128;
|
pub const RE_EMBED_BATCH_SIZE: usize = 128;
|
||||||
|
|
||||||
|
/// Default FastEmbed model (`BGESmallENV15`) when config and DB do not specify a valid code.
|
||||||
|
pub const DEFAULT_FASTEMBED_MODEL_CODE: &str = "Xenova/bge-small-en-v1.5";
|
||||||
|
|
||||||
|
/// A supported FastEmbed model for admin UI and documentation.
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
pub struct FastEmbedModelOption {
|
||||||
|
/// HuggingFace-style `model_code` accepted by [`EmbeddingModel::from_str`].
|
||||||
|
pub model_code: String,
|
||||||
|
/// Fixed output dimension for this model.
|
||||||
|
pub dimension: u32,
|
||||||
|
/// Short human-readable description from fastembed metadata.
|
||||||
|
pub description: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lists supported FastEmbed text embedding models (sorted by `model_code`).
|
||||||
|
#[must_use]
|
||||||
|
pub fn list_fastembed_embedding_models() -> Vec<FastEmbedModelOption> {
|
||||||
|
let mut list: Vec<FastEmbedModelOption> = TextEmbedding::list_supported_models()
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|info| {
|
||||||
|
let dimension = u32::try_from(info.dim).ok()?;
|
||||||
|
Some(FastEmbedModelOption {
|
||||||
|
model_code: info.model_code,
|
||||||
|
dimension,
|
||||||
|
description: info.description,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
list.sort_by(|left, right| left.model_code.cmp(&right.model_code));
|
||||||
|
list
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true when `code` is a supported FastEmbed `model_code` (HuggingFace-style id).
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_valid_fastembed_model_code(code: &str) -> bool {
|
||||||
|
!code.trim().is_empty() && EmbeddingModel::from_str(code.trim()).is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Vector dimension for a supported FastEmbed `model_code`.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`EmbeddingError::UnknownModel`] when the code is not recognized.
|
||||||
|
pub fn fastembed_model_dimension(code: &str) -> Result<u32, EmbeddingError> {
|
||||||
|
let model = EmbeddingModel::from_str(code.trim())
|
||||||
|
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(code)))?;
|
||||||
|
let dim = EmbeddingModel::get_model_info(&model)
|
||||||
|
.ok_or_else(|| {
|
||||||
|
EmbeddingError::Config(format!("fastembed model metadata missing for {code}"))
|
||||||
|
})?
|
||||||
|
.dim;
|
||||||
|
u32::try_from(dim).map_err(|_| {
|
||||||
|
EmbeddingError::Config(format!("fastembed model dimension {dim} exceeds u32::MAX"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolves the FastEmbed model code to load: config override, then DB, then default.
|
||||||
|
///
|
||||||
|
/// When `config.fastembed_model` is set it must be valid. When only the DB value is used and it
|
||||||
|
/// is not a FastEmbed code (e.g. legacy `text-embedding-3-small`), returns the default model.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`EmbeddingError::UnknownModel`] if `config.fastembed_model` is set but invalid.
|
||||||
|
pub fn resolve_fastembed_model_code(
|
||||||
|
config: &AppConfig,
|
||||||
|
settings_model: &str,
|
||||||
|
) -> Result<String, EmbeddingError> {
|
||||||
|
if let Some(code) = config.fastembed_model.as_deref() {
|
||||||
|
let trimmed = code.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
return Err(EmbeddingError::Config(
|
||||||
|
"fastembed_model must not be empty when set".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
EmbeddingModel::from_str(trimmed)
|
||||||
|
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(trimmed)))?;
|
||||||
|
return Ok(trimmed.to_owned());
|
||||||
|
}
|
||||||
|
|
||||||
|
let trimmed = settings_model.trim();
|
||||||
|
if is_valid_fastembed_model_code(trimmed) {
|
||||||
|
return Ok(trimmed.to_owned());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !trimmed.is_empty() {
|
||||||
|
warn!(
|
||||||
|
stored_model = trimmed,
|
||||||
|
default_model = DEFAULT_FASTEMBED_MODEL_CODE,
|
||||||
|
"system_settings.embedding_model is not a FastEmbed model code; using default"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(DEFAULT_FASTEMBED_MODEL_CODE.to_owned())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Persists a FastEmbed-compatible `embedding_model` and `embedding_dimensions` before startup
|
||||||
|
/// when the active backend is FastEmbed and stored settings still carry OpenAI defaults.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`AppError`] if settings cannot be loaded, resolved, or updated.
|
||||||
|
pub async fn align_fastembed_system_settings(
|
||||||
|
db: &SurrealDbClient,
|
||||||
|
config: &AppConfig,
|
||||||
|
) -> Result<SystemSettings, AppError> {
|
||||||
|
if config.embedding_backend != EmbeddingBackend::FastEmbed {
|
||||||
|
return SystemSettings::get_current(db).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut settings = SystemSettings::get_current(db).await?;
|
||||||
|
let resolved = resolve_fastembed_model_code(config, &settings.embedding_model)?;
|
||||||
|
let dimension = fastembed_model_dimension(&resolved)?;
|
||||||
|
|
||||||
|
if settings.embedding_model == resolved && settings.embedding_dimensions == dimension {
|
||||||
|
return Ok(settings);
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
old_model = %settings.embedding_model,
|
||||||
|
new_model = %resolved,
|
||||||
|
old_dimensions = settings.embedding_dimensions,
|
||||||
|
new_dimensions = dimension,
|
||||||
|
"Aligning system settings with FastEmbed model"
|
||||||
|
);
|
||||||
|
settings.embedding_model = resolved;
|
||||||
|
settings.embedding_dimensions = dimension;
|
||||||
|
SystemSettings::update(db, settings).await
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unknown_fastembed_model_message(code: &str) -> String {
|
||||||
|
let mut codes: Vec<String> = TextEmbedding::list_supported_models()
|
||||||
|
.into_iter()
|
||||||
|
.map(|info| info.model_code)
|
||||||
|
.collect();
|
||||||
|
codes.sort();
|
||||||
|
let examples: Vec<&str> = codes.iter().take(6).map(String::as_str).collect();
|
||||||
|
format!(
|
||||||
|
"unknown FastEmbed model '{code}' (expected a HuggingFace model_code such as {}). \
|
||||||
|
Set fastembed_model in config.yaml or update system_settings; \
|
||||||
|
see docs/configuration.md ({count} models supported)",
|
||||||
|
examples.join(", "),
|
||||||
|
count = codes.len()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// Default FastEmbed pool size.
|
/// Default FastEmbed pool size.
|
||||||
///
|
///
|
||||||
/// Kept small on purpose: the ONNX runtime already uses intra-op threads per inference, so
|
/// Kept small on purpose: the ONNX runtime already uses intra-op threads per inference, so
|
||||||
@@ -294,7 +443,8 @@ impl EmbeddingProvider {
|
|||||||
) -> Result<Self, EmbeddingError> {
|
) -> Result<Self, EmbeddingError> {
|
||||||
let pool_size = pool_size.max(1);
|
let pool_size = pool_size.max(1);
|
||||||
let model_name = if let Some(code) = model_override {
|
let model_name = if let Some(code) = model_override {
|
||||||
EmbeddingModel::from_str(&code).map_err(EmbeddingError::UnknownModel)?
|
EmbeddingModel::from_str(code.trim())
|
||||||
|
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(&code)))?
|
||||||
} else {
|
} else {
|
||||||
EmbeddingModel::default()
|
EmbeddingModel::default()
|
||||||
};
|
};
|
||||||
@@ -349,9 +499,10 @@ impl EmbeddingProvider {
|
|||||||
|
|
||||||
/// Creates an embedding provider from persisted settings and bootstrap config.
|
/// Creates an embedding provider from persisted settings and bootstrap config.
|
||||||
///
|
///
|
||||||
/// Model name and dimensions come from [`SystemSettings`]. The active backend is taken
|
/// OpenAI/hashed model settings come from [`SystemSettings`]. FastEmbed uses
|
||||||
/// from `config.embedding_backend` at startup; [`SystemSettings::sync_from_embedding_provider`]
|
/// [`resolve_fastembed_model_code`] (config `fastembed_model` overrides DB). The active
|
||||||
/// persists the resolved backend to the database.
|
/// backend is taken from `config.embedding_backend`; [`SystemSettings::sync_from_embedding_provider`]
|
||||||
|
/// persists the resolved backend to the database after startup.
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
///
|
||||||
@@ -375,7 +526,8 @@ impl EmbeddingProvider {
|
|||||||
let pool_size = config
|
let pool_size = config
|
||||||
.embedding_pool_size
|
.embedding_pool_size
|
||||||
.unwrap_or_else(default_embedding_pool_size);
|
.unwrap_or_else(default_embedding_pool_size);
|
||||||
Self::new_fastembed(Some(settings.embedding_model.clone()), pool_size).await
|
let model_code = resolve_fastembed_model_code(config, &settings.embedding_model)?;
|
||||||
|
Self::new_fastembed(Some(model_code), pool_size).await
|
||||||
}
|
}
|
||||||
EmbeddingBackend::Hashed => {
|
EmbeddingBackend::Hashed => {
|
||||||
let dimension = usize::try_from(dimensions).map_err(|_| {
|
let dimension = usize::try_from(dimensions).map_err(|_| {
|
||||||
@@ -433,7 +585,11 @@ fn bucket(token: &str, dimension: usize) -> usize {
|
|||||||
mod tests {
|
mod tests {
|
||||||
#![allow(clippy::expect_used)]
|
#![allow(clippy::expect_used)]
|
||||||
|
|
||||||
use super::{EmbeddingBackend, ParseEmbeddingBackendError};
|
use super::{
|
||||||
|
align_fastembed_system_settings, fastembed_model_dimension, list_fastembed_embedding_models,
|
||||||
|
resolve_fastembed_model_code, DEFAULT_FASTEMBED_MODEL_CODE, EmbeddingError,
|
||||||
|
};
|
||||||
|
use crate::utils::config::{AppConfig, EmbeddingBackend, ParseEmbeddingBackendError};
|
||||||
use crate::storage::types::system_settings::SystemSettings;
|
use crate::storage::types::system_settings::SystemSettings;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
@@ -483,6 +639,72 @@ mod tests {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn list_fastembed_embedding_models_includes_default() {
|
||||||
|
let models = list_fastembed_embedding_models();
|
||||||
|
assert!(
|
||||||
|
models
|
||||||
|
.iter()
|
||||||
|
.any(|m| m.model_code == DEFAULT_FASTEMBED_MODEL_CODE),
|
||||||
|
"catalog should include the default FastEmbed model"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_fastembed_model_prefers_config_over_db() {
|
||||||
|
let config = AppConfig {
|
||||||
|
fastembed_model: Some("Xenova/bge-base-en-v1.5".into()),
|
||||||
|
..AppConfig::default()
|
||||||
|
};
|
||||||
|
let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small")
|
||||||
|
.expect("config model");
|
||||||
|
assert_eq!(resolved, "Xenova/bge-base-en-v1.5");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_fastembed_model_falls_back_from_openai_default() {
|
||||||
|
let config = AppConfig::default();
|
||||||
|
let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small")
|
||||||
|
.expect("default model");
|
||||||
|
assert_eq!(resolved, DEFAULT_FASTEMBED_MODEL_CODE);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_fastembed_model_rejects_invalid_config_override() {
|
||||||
|
let config = AppConfig {
|
||||||
|
fastembed_model: Some("not-a-real-model".into()),
|
||||||
|
..AppConfig::default()
|
||||||
|
};
|
||||||
|
let err = resolve_fastembed_model_code(&config, "Xenova/bge-small-en-v1.5")
|
||||||
|
.expect_err("invalid config model");
|
||||||
|
assert!(matches!(err, EmbeddingError::UnknownModel(_)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fastembed_model_dimension_matches_model_metadata() {
|
||||||
|
let dim = fastembed_model_dimension(DEFAULT_FASTEMBED_MODEL_CODE).expect("dim");
|
||||||
|
assert_eq!(dim, 384);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn align_fastembed_system_settings_replaces_openai_default() -> anyhow::Result<()> {
|
||||||
|
use crate::storage::db::SurrealDbClient;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
let db = SurrealDbClient::memory("align_fe", &Uuid::new_v4().to_string()).await?;
|
||||||
|
db.apply_migrations().await?;
|
||||||
|
|
||||||
|
let config = AppConfig {
|
||||||
|
embedding_backend: EmbeddingBackend::FastEmbed,
|
||||||
|
..AppConfig::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let settings = align_fastembed_system_settings(&db, &config).await?;
|
||||||
|
assert_eq!(settings.embedding_model, DEFAULT_FASTEMBED_MODEL_CODE);
|
||||||
|
assert_eq!(settings.embedding_dimensions, 384);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn system_settings_deserializes_embedding_backend_field() {
|
fn system_settings_deserializes_embedding_backend_field() {
|
||||||
let value = json!({
|
let value = json!({
|
||||||
|
|||||||
@@ -24,7 +24,8 @@ Minne can be configured via environment variables or a `config.yaml` file. Envir
|
|||||||
| `RUST_LOG` | Logging level | `info` |
|
| `RUST_LOG` | Logging level | `info` |
|
||||||
| `STORAGE` | Storage backend (`local`, `memory`, `s3`) | `local` |
|
| `STORAGE` | Storage backend (`local`, `memory`, `s3`) | `local` |
|
||||||
| `PDF_INGEST_MODE` | PDF ingestion strategy (`classic`, `llm-first`) | `llm-first` |
|
| `PDF_INGEST_MODE` | PDF ingestion strategy (`classic`, `llm-first`) | `llm-first` |
|
||||||
| `EMBEDDING_BACKEND` | Embedding provider (`openai`, `fastembed`) | `fastembed` |
|
| `EMBEDDING_BACKEND` | Embedding provider (`openai`, `fastembed`, `hashed`) | `fastembed` |
|
||||||
|
| `FASTEMBED_MODEL` | FastEmbed HuggingFace `model_code` (overrides DB when set) | `Xenova/bge-small-en-v1.5` |
|
||||||
| `FASTEMBED_CACHE_DIR` | Model cache directory | `<data_dir>/fastembed` |
|
| `FASTEMBED_CACHE_DIR` | Model cache directory | `<data_dir>/fastembed` |
|
||||||
| `FASTEMBED_SHOW_DOWNLOAD_PROGRESS` | Show progress bar for model downloads | `false` |
|
| `FASTEMBED_SHOW_DOWNLOAD_PROGRESS` | Show progress bar for model downloads | `false` |
|
||||||
| `FASTEMBED_MAX_LENGTH` | Max sequence length for FastEmbed models | - |
|
| `FASTEMBED_MAX_LENGTH` | Max sequence length for FastEmbed models | - |
|
||||||
@@ -76,6 +77,8 @@ storage: "local"
|
|||||||
# s3_region: "us-east-1"
|
# s3_region: "us-east-1"
|
||||||
pdf_ingest_mode: "llm-first"
|
pdf_ingest_mode: "llm-first"
|
||||||
embedding_backend: "fastembed"
|
embedding_backend: "fastembed"
|
||||||
|
# HuggingFace model_code (see fastembed docs); dimensions are fixed per model
|
||||||
|
fastembed_model: "Xenova/bge-small-en-v1.5"
|
||||||
|
|
||||||
# Optional reranking
|
# Optional reranking
|
||||||
reranking_enabled: true
|
reranking_enabled: true
|
||||||
|
|||||||
@@ -2027,6 +2027,12 @@
|
|||||||
.border-base-200 {
|
.border-base-200 {
|
||||||
border-color: var(--color-base-200);
|
border-color: var(--color-base-200);
|
||||||
}
|
}
|
||||||
|
.border-base-content\/10 {
|
||||||
|
border-color: var(--color-base-content);
|
||||||
|
@supports (color: color-mix(in lab, red, red)) {
|
||||||
|
border-color: color-mix(in oklab, var(--color-base-content) 10%, transparent);
|
||||||
|
}
|
||||||
|
}
|
||||||
.border-error {
|
.border-error {
|
||||||
border-color: var(--color-error);
|
border-color: var(--color-error);
|
||||||
}
|
}
|
||||||
@@ -2048,6 +2054,12 @@
|
|||||||
.bg-base-200 {
|
.bg-base-200 {
|
||||||
background-color: var(--color-base-200);
|
background-color: var(--color-base-200);
|
||||||
}
|
}
|
||||||
|
.bg-base-200\/40 {
|
||||||
|
background-color: var(--color-base-200);
|
||||||
|
@supports (color: color-mix(in lab, red, red)) {
|
||||||
|
background-color: color-mix(in oklab, var(--color-base-200) 40%, transparent);
|
||||||
|
}
|
||||||
|
}
|
||||||
.bg-transparent {
|
.bg-transparent {
|
||||||
background-color: transparent;
|
background-color: transparent;
|
||||||
}
|
}
|
||||||
@@ -2334,6 +2346,9 @@
|
|||||||
.opacity-80 {
|
.opacity-80 {
|
||||||
opacity: 80%;
|
opacity: 80%;
|
||||||
}
|
}
|
||||||
|
.opacity-90 {
|
||||||
|
opacity: 90%;
|
||||||
|
}
|
||||||
.opacity-100 {
|
.opacity-100 {
|
||||||
opacity: 100%;
|
opacity: 100%;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,13 @@ use common::{
|
|||||||
},
|
},
|
||||||
system_settings::{SystemSettings, SystemSettingsPatch},
|
system_settings::{SystemSettings, SystemSettingsPatch},
|
||||||
},
|
},
|
||||||
utils::embedding::EmbeddingBackend,
|
utils::{
|
||||||
|
config::AppConfig,
|
||||||
|
embedding::{
|
||||||
|
fastembed_model_dimension, is_valid_fastembed_model_code, list_fastembed_embedding_models,
|
||||||
|
EmbeddingBackend, FastEmbedModelOption,
|
||||||
|
},
|
||||||
|
},
|
||||||
};
|
};
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
@@ -32,6 +38,9 @@ pub struct AdminPanelData {
|
|||||||
default_query_prompt: String,
|
default_query_prompt: String,
|
||||||
default_image_prompt: String,
|
default_image_prompt: String,
|
||||||
available_models: Option<ListModelResponse>,
|
available_models: Option<ListModelResponse>,
|
||||||
|
fastembed_models: Option<Vec<FastEmbedModelOption>>,
|
||||||
|
fastembed_model_locked_by_config: bool,
|
||||||
|
effective_embedding_backend: String,
|
||||||
current_section: AdminSection,
|
current_section: AdminSection,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -70,18 +79,30 @@ pub async fn show_admin_panel(
|
|||||||
(None, None)
|
(None, None)
|
||||||
};
|
};
|
||||||
|
|
||||||
let available_models = if section == AdminSection::Models {
|
let (available_models, fastembed_models, fastembed_model_locked_by_config) =
|
||||||
Some(
|
if section == AdminSection::Models {
|
||||||
state
|
let available_models = Some(
|
||||||
.openai_client
|
state
|
||||||
.models()
|
.openai_client
|
||||||
.list()
|
.models()
|
||||||
.await
|
.list()
|
||||||
.map_err(|e| AppError::InternalError(e.to_string()))?,
|
.await
|
||||||
)
|
.map_err(|e| AppError::InternalError(e.to_string()))?,
|
||||||
} else {
|
);
|
||||||
None
|
let fastembed_models = is_fastembed_admin_context(&settings, &state.config)
|
||||||
};
|
.then(list_fastembed_embedding_models);
|
||||||
|
let fastembed_model_locked_by_config = state.config.fastembed_model.is_some();
|
||||||
|
(
|
||||||
|
available_models,
|
||||||
|
fastembed_models,
|
||||||
|
fastembed_model_locked_by_config,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(None, None, false)
|
||||||
|
};
|
||||||
|
|
||||||
|
let effective_backend =
|
||||||
|
effective_embedding_backend(&settings, &state.config).as_str().to_string();
|
||||||
|
|
||||||
Ok(TemplateResponse::new_template(
|
Ok(TemplateResponse::new_template(
|
||||||
"admin/base.html",
|
"admin/base.html",
|
||||||
@@ -89,6 +110,9 @@ pub async fn show_admin_panel(
|
|||||||
settings,
|
settings,
|
||||||
analytics,
|
analytics,
|
||||||
available_models,
|
available_models,
|
||||||
|
fastembed_models,
|
||||||
|
fastembed_model_locked_by_config,
|
||||||
|
effective_embedding_backend: effective_backend,
|
||||||
users,
|
users,
|
||||||
default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
|
default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
|
||||||
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
|
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
|
||||||
@@ -150,6 +174,100 @@ pub struct ModelSettingsInput {
|
|||||||
pub struct ModelSettingsData {
|
pub struct ModelSettingsData {
|
||||||
settings: SystemSettings,
|
settings: SystemSettings,
|
||||||
available_models: ListModelResponse,
|
available_models: ListModelResponse,
|
||||||
|
fastembed_models: Option<Vec<FastEmbedModelOption>>,
|
||||||
|
fastembed_model_locked_by_config: bool,
|
||||||
|
effective_embedding_backend: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct EmbeddingSettingsPlan {
|
||||||
|
embedding_model: String,
|
||||||
|
embedding_dimensions: u32,
|
||||||
|
reembedding_needed: bool,
|
||||||
|
restart_needed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn effective_embedding_backend(settings: &SystemSettings, config: &AppConfig) -> EmbeddingBackend {
|
||||||
|
settings.embedding_backend.unwrap_or(config.embedding_backend)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_fastembed_admin_context(settings: &SystemSettings, config: &AppConfig) -> bool {
|
||||||
|
effective_embedding_backend(settings, config) == EmbeddingBackend::FastEmbed
|
||||||
|
}
|
||||||
|
|
||||||
|
fn plan_embedding_settings_update(
|
||||||
|
current: &SystemSettings,
|
||||||
|
input: &ModelSettingsInput,
|
||||||
|
config: &AppConfig,
|
||||||
|
) -> Result<EmbeddingSettingsPlan, AppError> {
|
||||||
|
match effective_embedding_backend(current, config) {
|
||||||
|
EmbeddingBackend::OpenAI => {
|
||||||
|
let reembedding_needed = input
|
||||||
|
.embedding_dimensions
|
||||||
|
.is_some_and(|new_dims| new_dims != current.embedding_dimensions);
|
||||||
|
let embedding_model = input
|
||||||
|
.embedding_model
|
||||||
|
.clone()
|
||||||
|
.unwrap_or_else(|| current.embedding_model.clone());
|
||||||
|
let embedding_dimensions = input
|
||||||
|
.embedding_dimensions
|
||||||
|
.unwrap_or(current.embedding_dimensions);
|
||||||
|
Ok(EmbeddingSettingsPlan {
|
||||||
|
embedding_model,
|
||||||
|
embedding_dimensions,
|
||||||
|
reembedding_needed,
|
||||||
|
restart_needed: reembedding_needed,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
EmbeddingBackend::FastEmbed => {
|
||||||
|
if config.fastembed_model.is_some() {
|
||||||
|
return Ok(EmbeddingSettingsPlan {
|
||||||
|
embedding_model: current.embedding_model.clone(),
|
||||||
|
embedding_dimensions: current.embedding_dimensions,
|
||||||
|
reembedding_needed: false,
|
||||||
|
restart_needed: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_model = input
|
||||||
|
.embedding_model
|
||||||
|
.as_deref()
|
||||||
|
.map(str::trim)
|
||||||
|
.filter(|value| !value.is_empty())
|
||||||
|
.map(ToOwned::to_owned)
|
||||||
|
.unwrap_or_else(|| current.embedding_model.clone());
|
||||||
|
|
||||||
|
if !is_valid_fastembed_model_code(&embedding_model) {
|
||||||
|
return Err(AppError::Validation(format!(
|
||||||
|
"Unknown FastEmbed model '{embedding_model}'. Choose a model from the list."
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_dimensions = fastembed_model_dimension(&embedding_model)
|
||||||
|
.map_err(AppError::from)?;
|
||||||
|
let reembedding_needed = embedding_dimensions != current.embedding_dimensions;
|
||||||
|
let restart_needed =
|
||||||
|
embedding_model != current.embedding_model || reembedding_needed;
|
||||||
|
|
||||||
|
Ok(EmbeddingSettingsPlan {
|
||||||
|
embedding_model,
|
||||||
|
embedding_dimensions,
|
||||||
|
reembedding_needed,
|
||||||
|
restart_needed,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
EmbeddingBackend::Hashed => {
|
||||||
|
info!(
|
||||||
|
backend = ?current.embedding_backend,
|
||||||
|
"Embedding model/dimensions for hashed backend are controlled by config"
|
||||||
|
);
|
||||||
|
Ok(EmbeddingSettingsPlan {
|
||||||
|
embedding_model: current.embedding_model.clone(),
|
||||||
|
embedding_dimensions: current.embedding_dimensions,
|
||||||
|
reembedding_needed: false,
|
||||||
|
restart_needed: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn update_model_settings(
|
pub async fn update_model_settings(
|
||||||
@@ -157,63 +275,32 @@ pub async fn update_model_settings(
|
|||||||
Form(input): Form<ModelSettingsInput>,
|
Form(input): Form<ModelSettingsInput>,
|
||||||
) -> TemplateResult {
|
) -> TemplateResult {
|
||||||
let current_settings = SystemSettings::get_current(&state.db).await?;
|
let current_settings = SystemSettings::get_current(&state.db).await?;
|
||||||
|
let embedding_plan =
|
||||||
// Check if using FastEmbed - if so, embedding model/dimensions cannot be changed via UI
|
plan_embedding_settings_update(¤t_settings, &input, &state.config)?;
|
||||||
let uses_local_embeddings = current_settings.embedding_backend.is_some_and(
|
|
||||||
|backend| matches!(backend, EmbeddingBackend::FastEmbed | EmbeddingBackend::Hashed),
|
|
||||||
);
|
|
||||||
|
|
||||||
// For local embeddings, ignore any embedding model/dimension changes from the form
|
|
||||||
let (final_embedding_model, final_embedding_dimensions, reembedding_needed) =
|
|
||||||
if uses_local_embeddings {
|
|
||||||
// Keep current values - they're controlled by config, not the admin UI
|
|
||||||
info!(
|
|
||||||
backend = ?current_settings.embedding_backend,
|
|
||||||
"Embedding model/dimensions controlled by config, ignoring form input"
|
|
||||||
);
|
|
||||||
(
|
|
||||||
current_settings.embedding_model.clone(),
|
|
||||||
current_settings.embedding_dimensions,
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
// OpenAI backend - allow changes from form
|
|
||||||
let reembedding_needed = input
|
|
||||||
.embedding_dimensions
|
|
||||||
.is_some_and(|new_dims| new_dims != current_settings.embedding_dimensions);
|
|
||||||
(
|
|
||||||
input
|
|
||||||
.embedding_model
|
|
||||||
.unwrap_or_else(|| current_settings.embedding_model.clone()),
|
|
||||||
input
|
|
||||||
.embedding_dimensions
|
|
||||||
.unwrap_or(current_settings.embedding_dimensions),
|
|
||||||
reembedding_needed,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
let new_settings = SystemSettingsPatch {
|
let new_settings = SystemSettingsPatch {
|
||||||
query_model: Some(input.query_model),
|
query_model: Some(input.query_model),
|
||||||
processing_model: Some(input.processing_model),
|
processing_model: Some(input.processing_model),
|
||||||
image_processing_model: Some(input.image_processing_model),
|
image_processing_model: Some(input.image_processing_model),
|
||||||
voice_processing_model: Some(input.voice_processing_model),
|
voice_processing_model: Some(input.voice_processing_model),
|
||||||
embedding_model: Some(final_embedding_model),
|
embedding_model: Some(embedding_plan.embedding_model),
|
||||||
embedding_dimensions: Some(final_embedding_dimensions),
|
embedding_dimensions: Some(embedding_plan.embedding_dimensions),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
.apply(&state.db)
|
.apply(&state.db)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
if reembedding_needed {
|
if embedding_plan.reembedding_needed {
|
||||||
// Re-embedding is owned by startup (the worker/combined binary), not the admin request.
|
// Re-embedding is owned by startup (the worker/combined binary), not the admin request.
|
||||||
// Doing it inline here would leave the live, startup-built embedding provider embedding
|
|
||||||
// queries at the old dimension while stored vectors move to the new one — broken retrieval
|
|
||||||
// until restart. Persisting the new settings is enough: on the next restart the maintainer
|
|
||||||
// detects the index/dimension mismatch and re-embeds before rebuilding indexes.
|
|
||||||
info!(
|
info!(
|
||||||
new_dimensions = new_settings.embedding_dimensions,
|
new_dimensions = new_settings.embedding_dimensions,
|
||||||
"Embedding dimensions changed; restart the worker/server to re-embed and apply"
|
"Embedding dimensions changed; restart the worker/server to re-embed and apply"
|
||||||
);
|
);
|
||||||
|
} else if embedding_plan.restart_needed {
|
||||||
|
info!(
|
||||||
|
new_model = %new_settings.embedding_model,
|
||||||
|
"Embedding model changed; restart the worker/server to apply"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let available_models = state
|
let available_models = state
|
||||||
@@ -223,16 +310,98 @@ pub async fn update_model_settings(
|
|||||||
.await
|
.await
|
||||||
.map_err(|_e| AppError::InternalError("Failed to get models".to_string()))?;
|
.map_err(|_e| AppError::InternalError("Failed to get models".to_string()))?;
|
||||||
|
|
||||||
|
let effective_backend =
|
||||||
|
effective_embedding_backend(&new_settings, &state.config).as_str().to_string();
|
||||||
|
let show_fastembed_models =
|
||||||
|
is_fastembed_admin_context(&new_settings, &state.config).then(list_fastembed_embedding_models);
|
||||||
|
|
||||||
Ok(TemplateResponse::new_partial(
|
Ok(TemplateResponse::new_partial(
|
||||||
"admin/sections/models.html",
|
"admin/sections/models.html",
|
||||||
"model_settings_form",
|
"model_settings_form",
|
||||||
ModelSettingsData {
|
ModelSettingsData {
|
||||||
settings: new_settings,
|
settings: new_settings,
|
||||||
available_models,
|
available_models,
|
||||||
|
fastembed_models: show_fastembed_models,
|
||||||
|
fastembed_model_locked_by_config: state.config.fastembed_model.is_some(),
|
||||||
|
effective_embedding_backend: effective_backend,
|
||||||
},
|
},
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
#![allow(clippy::expect_used)]
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use common::utils::config::AppConfig;
|
||||||
|
|
||||||
|
fn openai_settings() -> SystemSettings {
|
||||||
|
SystemSettings {
|
||||||
|
id: "current".into(),
|
||||||
|
registrations_enabled: true,
|
||||||
|
require_email_verification: false,
|
||||||
|
query_model: "gpt-4o-mini".into(),
|
||||||
|
processing_model: "gpt-4o-mini".into(),
|
||||||
|
embedding_model: "text-embedding-3-small".into(),
|
||||||
|
embedding_dimensions: 1536,
|
||||||
|
embedding_backend: Some(EmbeddingBackend::OpenAI),
|
||||||
|
query_system_prompt: "q".into(),
|
||||||
|
ingestion_system_prompt: "i".into(),
|
||||||
|
image_processing_model: "gpt-4o-mini".into(),
|
||||||
|
image_processing_prompt: "p".into(),
|
||||||
|
voice_processing_model: "whisper-1".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plan_fastembed_update_sets_dimensions_from_model_metadata() {
|
||||||
|
let current = SystemSettings {
|
||||||
|
embedding_backend: Some(EmbeddingBackend::FastEmbed),
|
||||||
|
embedding_model: "Xenova/bge-small-en-v1.5".into(),
|
||||||
|
embedding_dimensions: 384,
|
||||||
|
..openai_settings()
|
||||||
|
};
|
||||||
|
let input = ModelSettingsInput {
|
||||||
|
query_model: current.query_model.clone(),
|
||||||
|
processing_model: current.processing_model.clone(),
|
||||||
|
image_processing_model: current.image_processing_model.clone(),
|
||||||
|
voice_processing_model: current.voice_processing_model.clone(),
|
||||||
|
embedding_model: Some("Xenova/bge-base-en-v1.5".into()),
|
||||||
|
embedding_dimensions: None,
|
||||||
|
};
|
||||||
|
let plan = plan_embedding_settings_update(¤t, &input, &AppConfig::default())
|
||||||
|
.expect("plan");
|
||||||
|
assert_eq!(plan.embedding_model, "Xenova/bge-base-en-v1.5");
|
||||||
|
assert_eq!(plan.embedding_dimensions, 768);
|
||||||
|
assert!(plan.reembedding_needed);
|
||||||
|
assert!(plan.restart_needed);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plan_fastembed_ignores_form_when_config_overrides_model() {
|
||||||
|
let current = SystemSettings {
|
||||||
|
embedding_backend: Some(EmbeddingBackend::FastEmbed),
|
||||||
|
..openai_settings()
|
||||||
|
};
|
||||||
|
let input = ModelSettingsInput {
|
||||||
|
query_model: current.query_model.clone(),
|
||||||
|
processing_model: current.processing_model.clone(),
|
||||||
|
image_processing_model: current.image_processing_model.clone(),
|
||||||
|
voice_processing_model: current.voice_processing_model.clone(),
|
||||||
|
embedding_model: Some("Xenova/bge-large-en-v1.5".into()),
|
||||||
|
embedding_dimensions: None,
|
||||||
|
};
|
||||||
|
let config = AppConfig {
|
||||||
|
embedding_backend: EmbeddingBackend::FastEmbed,
|
||||||
|
fastembed_model: Some("Xenova/bge-small-en-v1.5".into()),
|
||||||
|
..AppConfig::default()
|
||||||
|
};
|
||||||
|
let plan = plan_embedding_settings_update(¤t, &input, &config).expect("plan");
|
||||||
|
assert_eq!(plan.embedding_model, current.embedding_model);
|
||||||
|
assert!(!plan.restart_needed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
pub struct SystemPromptEditData {
|
pub struct SystemPromptEditData {
|
||||||
settings: SystemSettings,
|
settings: SystemSettings,
|
||||||
|
|||||||
@@ -4,7 +4,8 @@
|
|||||||
<div class="text-sm uppercase tracking-wide opacity-60 mb-1">AI Models</div>
|
<div class="text-sm uppercase tracking-wide opacity-60 mb-1">AI Models</div>
|
||||||
<h2 class="text-lg font-semibold">Model configuration</h2>
|
<h2 class="text-lg font-semibold">Model configuration</h2>
|
||||||
<p class="text-xs opacity-70 max-w-2xl">
|
<p class="text-xs opacity-70 max-w-2xl">
|
||||||
Choose which models power conversational search, ingestion analysis, and embeddings. Adjusting embeddings may trigger a full reprocess.
|
Choose which models power conversational search, ingestion analysis, and embeddings.
|
||||||
|
Embedding dimension changes apply after you restart the worker or server.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<a
|
<a
|
||||||
@@ -70,7 +71,30 @@
|
|||||||
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
|
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
|
||||||
<div>
|
<div>
|
||||||
<div class="text-sm opacity-80 mb-1">Embedding Model</div>
|
<div class="text-sm opacity-80 mb-1">Embedding Model</div>
|
||||||
{% if settings.embedding_backend == "fastembed" or settings.embedding_backend == "hashed" %}
|
{% if effective_embedding_backend == "fastembed" %}
|
||||||
|
{% if fastembed_model_locked_by_config %}
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
class="nb-input w-full opacity-60 cursor-not-allowed"
|
||||||
|
value="{{ settings.embedding_model }}"
|
||||||
|
disabled
|
||||||
|
/>
|
||||||
|
<p class="text-xs text-info mt-1">
|
||||||
|
Overridden by <span class="font-mono">fastembed_model</span> in config.yaml at startup. Remove that setting to manage the model here.
|
||||||
|
</p>
|
||||||
|
{% else %}
|
||||||
|
<select name="embedding_model" id="fastembed_model_select" class="nb-select w-full">
|
||||||
|
{% for fe in fastembed_models %}
|
||||||
|
<option value="{{ fe.model_code }}" {% if settings.embedding_model == fe.model_code %}selected{% endif %}>
|
||||||
|
{{ fe.model_code }} ({{ fe.dimension }} dims)
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
<p class="text-xs opacity-70 mt-1">
|
||||||
|
Save, then restart the worker or server to load the new model. First run may download weights.
|
||||||
|
</p>
|
||||||
|
{% endif %}
|
||||||
|
{% elif effective_embedding_backend == "hashed" %}
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
name="embedding_model"
|
name="embedding_model"
|
||||||
@@ -78,11 +102,8 @@
|
|||||||
value="{{ settings.embedding_model }}"
|
value="{{ settings.embedding_model }}"
|
||||||
disabled
|
disabled
|
||||||
/>
|
/>
|
||||||
<p class="text-xs opacity-70 mt-1">
|
|
||||||
Model: <span class="font-mono">{{ settings.embedding_model }} ({{ settings.embedding_dimensions }} dims)</span>
|
|
||||||
</p>
|
|
||||||
<p class="text-xs text-info mt-1">
|
<p class="text-xs text-info mt-1">
|
||||||
Info: Embedding model is controlled by config when using <span class="font-mono">{{ settings.embedding_backend }}</span> backend.
|
Hashed embeddings use <span class="font-mono">embedding_dimensions</span> from config, not the admin UI.
|
||||||
</p>
|
</p>
|
||||||
{% else %}
|
{% else %}
|
||||||
<select name="embedding_model" class="nb-select w-full">
|
<select name="embedding_model" class="nb-select w-full">
|
||||||
@@ -96,7 +117,18 @@
|
|||||||
|
|
||||||
<div>
|
<div>
|
||||||
<div class="text-sm opacity-80 mb-1" for="embedding_dimensions">Embedding Dimensions</div>
|
<div class="text-sm opacity-80 mb-1" for="embedding_dimensions">Embedding Dimensions</div>
|
||||||
{% if settings.embedding_backend == "fastembed" or settings.embedding_backend == "hashed" %}
|
{% if effective_embedding_backend == "fastembed" %}
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
id="embedding_dimensions"
|
||||||
|
class="nb-input w-full opacity-60 cursor-not-allowed"
|
||||||
|
value="{{ settings.embedding_dimensions }}"
|
||||||
|
disabled
|
||||||
|
/>
|
||||||
|
<p class="text-xs opacity-70 mt-1">
|
||||||
|
Fixed by the selected FastEmbed model. A dimension change triggers a full re-embed after restart.
|
||||||
|
</p>
|
||||||
|
{% elif effective_embedding_backend == "hashed" %}
|
||||||
<input
|
<input
|
||||||
type="number"
|
type="number"
|
||||||
id="embedding_dimensions"
|
id="embedding_dimensions"
|
||||||
@@ -106,8 +138,7 @@
|
|||||||
disabled
|
disabled
|
||||||
/>
|
/>
|
||||||
<p class="text-xs text-info mt-1">
|
<p class="text-xs text-info mt-1">
|
||||||
Info: Dimensions are fixed for <span class="font-mono">{{ settings.embedding_backend }}</span> backend. Set
|
Set <span class="font-mono">EMBEDDING_BACKEND=openai</span> for OpenAI embeddings, or configure hashed dims in config.
|
||||||
<span class="font-mono">EMBEDDING_BACKEND=openai</span> to use OpenAI embeddings.
|
|
||||||
</p>
|
</p>
|
||||||
{% else %}
|
{% else %}
|
||||||
<input
|
<input
|
||||||
@@ -119,15 +150,50 @@
|
|||||||
required
|
required
|
||||||
min="1"
|
min="1"
|
||||||
/>
|
/>
|
||||||
<p class="text-xs opacity-70 mt-1">Changing dimensions will trigger a background re-embedding.</p>
|
<p class="text-xs opacity-70 mt-1">
|
||||||
|
Saving a new dimension updates settings only. Restart the worker (or combined app) to re-embed stored data and rebuild indexes.
|
||||||
|
</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if settings.embedding_backend != "fastembed" and settings.embedding_backend != "hashed" %}
|
{% if effective_embedding_backend == "fastembed" and not fastembed_model_locked_by_config %}
|
||||||
|
<div class="nb-panel p-3 bg-base-200/40 border border-base-content/10 text-xs opacity-90 max-w-3xl">
|
||||||
|
<p class="mb-2">
|
||||||
|
<strong>FastEmbed:</strong> The running process keeps the model loaded until restart. Changing to a model with a
|
||||||
|
different dimension re-embeds all stored vectors on the next worker/server startup.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Same-dimension model swaps update settings only; existing vectors are not automatically regenerated until you
|
||||||
|
change dimension (or re-embed via the OpenAI workaround described in ops docs).
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="fastembed-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
|
||||||
|
<div class="text-sm">
|
||||||
|
<strong>Warning:</strong> You changed the FastEmbed model. Save, then restart the worker or server to apply.
|
||||||
|
If the dimension changed, stored embeddings and HNSW indexes will be rebuilt on startup.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if effective_embedding_backend != "fastembed" and effective_embedding_backend != "hashed" %}
|
||||||
|
<div class="nb-panel p-3 bg-base-200/40 border border-base-content/10 text-xs opacity-90 max-w-3xl">
|
||||||
|
<p class="mb-2">
|
||||||
|
<strong>Re-embedding stored data:</strong> Only a change to <span class="font-mono">embedding_dimensions</span>
|
||||||
|
followed by a restart triggers a full re-embed of text chunks and knowledge entities. Changing the embedding model alone
|
||||||
|
does <em>not</em> update vectors already in the database.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
To force a full re-embed (for example after switching models), save a <em>different</em> dimension integer, restart the
|
||||||
|
worker, then set the final dimension and model and restart again if needed.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div id="embedding-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
|
<div id="embedding-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
|
||||||
<div class="text-sm">
|
<div class="text-sm">
|
||||||
<strong>Warning:</strong> Changing dimensions recreates embeddings for text chunks and knowledge entities. Confirm the target model requires the new value.
|
<strong>Warning:</strong> You changed embedding dimensions. Save, then restart the worker or server so stored embeddings
|
||||||
|
and HNSW indexes are rebuilt. Until then, search may use the old dimension.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
@@ -137,7 +203,26 @@
|
|||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
{% if settings.embedding_backend != "fastembed" and settings.embedding_backend != "hashed" %}
|
{% if effective_embedding_backend == "fastembed" and not fastembed_model_locked_by_config %}
|
||||||
|
<script>
|
||||||
|
(() => {
|
||||||
|
const modelSelect = document.getElementById('fastembed_model_select');
|
||||||
|
const alertElement = document.getElementById('fastembed-change-alert');
|
||||||
|
const initialModel = '{{ settings.embedding_model }}';
|
||||||
|
if (modelSelect && alertElement) {
|
||||||
|
modelSelect.addEventListener('change', (event) => {
|
||||||
|
if (String(event.target.value) !== String(initialModel)) {
|
||||||
|
alertElement.classList.remove('hidden');
|
||||||
|
} else {
|
||||||
|
alertElement.classList.add('hidden');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if effective_embedding_backend != "fastembed" and effective_embedding_backend != "hashed" %}
|
||||||
<script>
|
<script>
|
||||||
(() => {
|
(() => {
|
||||||
const dimensionInput = document.getElementById('embedding_dimensions');
|
const dimensionInput = document.getElementById('embedding_dimensions');
|
||||||
|
|||||||
@@ -11,11 +11,10 @@ use common::{
|
|||||||
storage::{
|
storage::{
|
||||||
db::SurrealDbClient,
|
db::SurrealDbClient,
|
||||||
store::StorageManager,
|
store::StorageManager,
|
||||||
types::system_settings::SystemSettings,
|
|
||||||
},
|
},
|
||||||
utils::{
|
utils::{
|
||||||
config::{get_config, AppConfig},
|
config::{get_config, AppConfig},
|
||||||
embedding::EmbeddingProvider,
|
embedding::{align_fastembed_system_settings, EmbeddingProvider},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use retrieval_pipeline::reranking::RerankerPool;
|
use retrieval_pipeline::reranking::RerankerPool;
|
||||||
@@ -58,9 +57,9 @@ pub(crate) async fn init_with_config(config: AppConfig) -> anyhow::Result<Shared
|
|||||||
.await
|
.await
|
||||||
.context("apply database migrations")?;
|
.context("apply database migrations")?;
|
||||||
|
|
||||||
let settings = SystemSettings::get_current(&db)
|
let settings = align_fastembed_system_settings(&db, &config)
|
||||||
.await
|
.await
|
||||||
.context("load system settings")?;
|
.context("align fastembed system settings")?;
|
||||||
|
|
||||||
let openai_client = Arc::new(Client::with_config(
|
let openai_client = Arc::new(Client::with_config(
|
||||||
async_openai::config::OpenAIConfig::new()
|
async_openai::config::OpenAIConfig::new()
|
||||||
|
|||||||
@@ -213,8 +213,19 @@ async fn release_reembed_lock(db: &SurrealDbClient, owner: &str) {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[allow(clippy::expect_used)]
|
#[allow(clippy::expect_used)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use common::storage::db::SurrealDbClient;
|
use common::{
|
||||||
|
storage::{
|
||||||
|
db::SurrealDbClient,
|
||||||
|
indexes::{embedding_index_dimension, ensure_runtime},
|
||||||
|
types::{system_settings::SystemSettings, text_chunk::TextChunk},
|
||||||
|
},
|
||||||
|
utils::embedding::EmbeddingProvider,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::bootstrap::tests::init_smoke_services;
|
||||||
|
|
||||||
async fn test_db() -> SurrealDbClient {
|
async fn test_db() -> SurrealDbClient {
|
||||||
SurrealDbClient::memory("reembed_lock_ns", &reembed_lock_owner())
|
SurrealDbClient::memory("reembed_lock_ns", &reembed_lock_owner())
|
||||||
@@ -222,6 +233,140 @@ mod tests {
|
|||||||
.expect("in-memory db")
|
.expect("in-memory db")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Index at `stored_dim`, active provider at `target_dim` (no chunks — re-embed only rebuilds indexes).
|
||||||
|
async fn services_with_index_provider_mismatch(
|
||||||
|
stored_dim: usize,
|
||||||
|
target_dim: usize,
|
||||||
|
) -> (super::SharedServices, std::path::PathBuf) {
|
||||||
|
let (mut services, data_dir) = init_smoke_services()
|
||||||
|
.await
|
||||||
|
.expect("smoke services");
|
||||||
|
|
||||||
|
ensure_runtime(&services.db, stored_dim)
|
||||||
|
.await
|
||||||
|
.expect("seed index at stored dimension");
|
||||||
|
|
||||||
|
let mut settings = SystemSettings::get_current(&services.db)
|
||||||
|
.await
|
||||||
|
.expect("settings");
|
||||||
|
settings.embedding_dimensions = u32::try_from(target_dim).expect("target dim fits u32");
|
||||||
|
SystemSettings::update(&services.db, settings)
|
||||||
|
.await
|
||||||
|
.expect("update settings");
|
||||||
|
|
||||||
|
services.embedding_provider = Arc::new(
|
||||||
|
EmbeddingProvider::new_hashed(target_dim).expect("hashed provider for test"),
|
||||||
|
);
|
||||||
|
|
||||||
|
(services, data_dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn maintainer_reconciles_index_when_provider_dimension_differs() {
|
||||||
|
let (services, data_dir) = services_with_index_provider_mismatch(3, 5).await;
|
||||||
|
|
||||||
|
prepare_embedding_runtime(&services, EmbeddingRuntimeRole::Maintainer)
|
||||||
|
.await
|
||||||
|
.expect("maintainer startup");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedding_index_dimension(&services.db).await.expect("index dim"),
|
||||||
|
Some(5),
|
||||||
|
"maintainer should rebuild the index to the provider dimension"
|
||||||
|
);
|
||||||
|
|
||||||
|
tokio::fs::remove_dir_all(&data_dir).await.ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn read_only_startup_preserves_index_when_provider_dimension_differs() {
|
||||||
|
let (services, data_dir) = services_with_index_provider_mismatch(3, 5).await;
|
||||||
|
|
||||||
|
prepare_embedding_runtime(&services, EmbeddingRuntimeRole::ReadOnly)
|
||||||
|
.await
|
||||||
|
.expect("read-only startup");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedding_index_dimension(&services.db).await.expect("index dim"),
|
||||||
|
Some(3),
|
||||||
|
"read-only server must not overwrite the index before a maintainer re-embeds"
|
||||||
|
);
|
||||||
|
|
||||||
|
tokio::fs::remove_dir_all(&data_dir).await.ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn maintainer_reembeds_chunks_when_index_dimension_differs() {
|
||||||
|
let (mut services, data_dir) = init_smoke_services()
|
||||||
|
.await
|
||||||
|
.expect("smoke services");
|
||||||
|
|
||||||
|
let mut settings = SystemSettings::get_current(&services.db)
|
||||||
|
.await
|
||||||
|
.expect("settings");
|
||||||
|
settings.embedding_dimensions = 3;
|
||||||
|
SystemSettings::update(&services.db, settings)
|
||||||
|
.await
|
||||||
|
.expect("settings at stored dimension");
|
||||||
|
services.embedding_provider =
|
||||||
|
Arc::new(EmbeddingProvider::new_hashed(3).expect("stored-dimension provider"));
|
||||||
|
|
||||||
|
ensure_runtime(&services.db, 3)
|
||||||
|
.await
|
||||||
|
.expect("seed index at stored dimension");
|
||||||
|
|
||||||
|
let chunk = TextChunk::new(
|
||||||
|
"reembed-src".into(),
|
||||||
|
"dimension migration test chunk".into(),
|
||||||
|
"user1".into(),
|
||||||
|
);
|
||||||
|
TextChunk::store_with_embedding(chunk, vec![0.1, 0.2, 0.3], &services.db)
|
||||||
|
.await
|
||||||
|
.expect("store chunk at old dimension");
|
||||||
|
|
||||||
|
let mut settings = SystemSettings::get_current(&services.db)
|
||||||
|
.await
|
||||||
|
.expect("settings");
|
||||||
|
settings.embedding_dimensions = 5;
|
||||||
|
SystemSettings::update(&services.db, settings)
|
||||||
|
.await
|
||||||
|
.expect("update settings to target dimension");
|
||||||
|
services.embedding_provider =
|
||||||
|
Arc::new(EmbeddingProvider::new_hashed(5).expect("target provider"));
|
||||||
|
|
||||||
|
prepare_embedding_runtime(&services, EmbeddingRuntimeRole::Maintainer)
|
||||||
|
.await
|
||||||
|
.expect("maintainer startup with data");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
embedding_index_dimension(&services.db).await.expect("index dim"),
|
||||||
|
Some(5)
|
||||||
|
);
|
||||||
|
|
||||||
|
let rows: Vec<serde_json::Value> = services
|
||||||
|
.db
|
||||||
|
.client
|
||||||
|
.query("SELECT embedding FROM text_chunk_embedding;")
|
||||||
|
.await
|
||||||
|
.expect("query embeddings")
|
||||||
|
.take(0)
|
||||||
|
.expect("take rows");
|
||||||
|
let row = rows
|
||||||
|
.first()
|
||||||
|
.expect("exactly one embedding row after re-embed");
|
||||||
|
let embedding = row
|
||||||
|
.get("embedding")
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
.expect("embedding array");
|
||||||
|
assert_eq!(
|
||||||
|
embedding.len(),
|
||||||
|
5,
|
||||||
|
"stored vectors should match the new provider dimension"
|
||||||
|
);
|
||||||
|
|
||||||
|
tokio::fs::remove_dir_all(&data_dir).await.ok();
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn reembed_lock_is_exclusive_and_reusable_after_release() {
|
async fn reembed_lock_is_exclusive_and_reusable_after_release() {
|
||||||
let db = test_db().await;
|
let db = test_db().await;
|
||||||
|
|||||||
Reference in New Issue
Block a user