diff --git a/CHANGELOG.md b/CHANGELOG.md index e2c3718..7415097 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased -- Search results are now selectable by which type, knowledge entity or ingested content +- Search results are now selectable by which type, knowledge entity or ingested content +- Now possible to choose the local embedding model via the admin ui +- Admin embedding changes are saved immediately. Needs restart to re-embed. This simplifies the flow, may be subject to improvement in the future ## 1.0.2 (2026-02-15) - Fix: edge case where navigation back to a chat page could trigger a new response generation diff --git a/common/db/migrations/20260604_000001_fastembed_default_embedding_model.surql b/common/db/migrations/20260604_000001_fastembed_default_embedding_model.surql new file mode 100644 index 0000000..0964746 --- /dev/null +++ b/common/db/migrations/20260604_000001_fastembed_default_embedding_model.surql @@ -0,0 +1,8 @@ +-- Align persisted embedding settings when FastEmbed is the recorded backend but the model +-- name is still the OpenAI migration default (invalid for FastEmbed `from_str`). + +UPDATE system_settings:current SET + embedding_model = 'Xenova/bge-small-en-v1.5', + embedding_dimensions = 384 +WHERE embedding_backend = 'fastembed' + AND embedding_model = 'text-embedding-3-small'; diff --git a/common/src/utils/config.rs b/common/src/utils/config.rs index 0e07eae..540e7ec 100644 --- a/common/src/utils/config.rs +++ b/common/src/utils/config.rs @@ -117,6 +117,10 @@ pub struct AppConfig { pub fastembed_show_download_progress: Option, #[serde(default)] pub fastembed_max_length: Option, + /// HuggingFace-style FastEmbed `model_code` (e.g. `Xenova/bge-small-en-v1.5`). Overrides + /// `system_settings.embedding_model` when `embedding_backend` is `fastembed`. + #[serde(default)] + pub fastembed_model: Option, #[serde(default)] pub embedding_backend: EmbeddingBackend, #[serde(default)] @@ -226,6 +230,7 @@ impl Default for AppConfig { fastembed_cache_dir: None, fastembed_show_download_progress: None, fastembed_max_length: None, + fastembed_model: None, embedding_backend: EmbeddingBackend::default(), embedding_pool_size: None, ingest_max_body_bytes: default_ingest_max_body_bytes(), diff --git a/common/src/utils/embedding.rs b/common/src/utils/embedding.rs index f02f2c5..cf2dfec 100644 --- a/common/src/utils/embedding.rs +++ b/common/src/utils/embedding.rs @@ -6,13 +6,16 @@ use std::{ thread::available_parallelism, }; +use serde::Serialize; +use tracing::warn; + use async_openai::{types::CreateEmbeddingRequestArgs, Client}; use fastembed::{EmbeddingModel, ModelTrait, TextEmbedding, TextInitOptions}; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use crate::{ - error::EmbeddingError, - storage::types::system_settings::SystemSettings, + error::{AppError, EmbeddingError}, + storage::{db::SurrealDbClient, types::system_settings::SystemSettings}, utils::config::AppConfig, }; @@ -59,6 +62,152 @@ enum EmbeddingInner { /// progress logging while still amortising per-call lock/dispatch overhead. pub const RE_EMBED_BATCH_SIZE: usize = 128; +/// Default FastEmbed model (`BGESmallENV15`) when config and DB do not specify a valid code. +pub const DEFAULT_FASTEMBED_MODEL_CODE: &str = "Xenova/bge-small-en-v1.5"; + +/// A supported FastEmbed model for admin UI and documentation. +#[derive(Clone, Debug, Serialize)] +pub struct FastEmbedModelOption { + /// HuggingFace-style `model_code` accepted by [`EmbeddingModel::from_str`]. + pub model_code: String, + /// Fixed output dimension for this model. + pub dimension: u32, + /// Short human-readable description from fastembed metadata. + pub description: String, +} + +/// Lists supported FastEmbed text embedding models (sorted by `model_code`). +#[must_use] +pub fn list_fastembed_embedding_models() -> Vec { + let mut list: Vec = TextEmbedding::list_supported_models() + .into_iter() + .filter_map(|info| { + let dimension = u32::try_from(info.dim).ok()?; + Some(FastEmbedModelOption { + model_code: info.model_code, + dimension, + description: info.description, + }) + }) + .collect(); + list.sort_by(|left, right| left.model_code.cmp(&right.model_code)); + list +} + +/// Returns true when `code` is a supported FastEmbed `model_code` (HuggingFace-style id). +#[must_use] +pub fn is_valid_fastembed_model_code(code: &str) -> bool { + !code.trim().is_empty() && EmbeddingModel::from_str(code.trim()).is_ok() +} + +/// Vector dimension for a supported FastEmbed `model_code`. +/// +/// # Errors +/// +/// Returns [`EmbeddingError::UnknownModel`] when the code is not recognized. +pub fn fastembed_model_dimension(code: &str) -> Result { + let model = EmbeddingModel::from_str(code.trim()) + .map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(code)))?; + let dim = EmbeddingModel::get_model_info(&model) + .ok_or_else(|| { + EmbeddingError::Config(format!("fastembed model metadata missing for {code}")) + })? + .dim; + u32::try_from(dim).map_err(|_| { + EmbeddingError::Config(format!("fastembed model dimension {dim} exceeds u32::MAX")) + }) +} + +/// Resolves the FastEmbed model code to load: config override, then DB, then default. +/// +/// When `config.fastembed_model` is set it must be valid. When only the DB value is used and it +/// is not a FastEmbed code (e.g. legacy `text-embedding-3-small`), returns the default model. +/// +/// # Errors +/// +/// Returns [`EmbeddingError::UnknownModel`] if `config.fastembed_model` is set but invalid. +pub fn resolve_fastembed_model_code( + config: &AppConfig, + settings_model: &str, +) -> Result { + if let Some(code) = config.fastembed_model.as_deref() { + let trimmed = code.trim(); + if trimmed.is_empty() { + return Err(EmbeddingError::Config( + "fastembed_model must not be empty when set".into(), + )); + } + EmbeddingModel::from_str(trimmed) + .map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(trimmed)))?; + return Ok(trimmed.to_owned()); + } + + let trimmed = settings_model.trim(); + if is_valid_fastembed_model_code(trimmed) { + return Ok(trimmed.to_owned()); + } + + if !trimmed.is_empty() { + warn!( + stored_model = trimmed, + default_model = DEFAULT_FASTEMBED_MODEL_CODE, + "system_settings.embedding_model is not a FastEmbed model code; using default" + ); + } + + Ok(DEFAULT_FASTEMBED_MODEL_CODE.to_owned()) +} + +/// Persists a FastEmbed-compatible `embedding_model` and `embedding_dimensions` before startup +/// when the active backend is FastEmbed and stored settings still carry OpenAI defaults. +/// +/// # Errors +/// +/// Returns [`AppError`] if settings cannot be loaded, resolved, or updated. +pub async fn align_fastembed_system_settings( + db: &SurrealDbClient, + config: &AppConfig, +) -> Result { + if config.embedding_backend != EmbeddingBackend::FastEmbed { + return SystemSettings::get_current(db).await; + } + + let mut settings = SystemSettings::get_current(db).await?; + let resolved = resolve_fastembed_model_code(config, &settings.embedding_model)?; + let dimension = fastembed_model_dimension(&resolved)?; + + if settings.embedding_model == resolved && settings.embedding_dimensions == dimension { + return Ok(settings); + } + + tracing::info!( + old_model = %settings.embedding_model, + new_model = %resolved, + old_dimensions = settings.embedding_dimensions, + new_dimensions = dimension, + "Aligning system settings with FastEmbed model" + ); + settings.embedding_model = resolved; + settings.embedding_dimensions = dimension; + SystemSettings::update(db, settings).await +} + +fn unknown_fastembed_model_message(code: &str) -> String { + let mut codes: Vec = TextEmbedding::list_supported_models() + .into_iter() + .map(|info| info.model_code) + .collect(); + codes.sort(); + let examples: Vec<&str> = codes.iter().take(6).map(String::as_str).collect(); + format!( + "unknown FastEmbed model '{code}' (expected a HuggingFace model_code such as {}). \ + Set fastembed_model in config.yaml or update system_settings; \ + see docs/configuration.md ({count} models supported)", + examples.join(", "), + count = codes.len() + ) +} + /// Default FastEmbed pool size. /// /// Kept small on purpose: the ONNX runtime already uses intra-op threads per inference, so @@ -294,7 +443,8 @@ impl EmbeddingProvider { ) -> Result { let pool_size = pool_size.max(1); let model_name = if let Some(code) = model_override { - EmbeddingModel::from_str(&code).map_err(EmbeddingError::UnknownModel)? + EmbeddingModel::from_str(code.trim()) + .map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(&code)))? } else { EmbeddingModel::default() }; @@ -349,9 +499,10 @@ impl EmbeddingProvider { /// Creates an embedding provider from persisted settings and bootstrap config. /// - /// Model name and dimensions come from [`SystemSettings`]. The active backend is taken - /// from `config.embedding_backend` at startup; [`SystemSettings::sync_from_embedding_provider`] - /// persists the resolved backend to the database. + /// OpenAI/hashed model settings come from [`SystemSettings`]. FastEmbed uses + /// [`resolve_fastembed_model_code`] (config `fastembed_model` overrides DB). The active + /// backend is taken from `config.embedding_backend`; [`SystemSettings::sync_from_embedding_provider`] + /// persists the resolved backend to the database after startup. /// /// # Errors /// @@ -375,7 +526,8 @@ impl EmbeddingProvider { let pool_size = config .embedding_pool_size .unwrap_or_else(default_embedding_pool_size); - Self::new_fastembed(Some(settings.embedding_model.clone()), pool_size).await + let model_code = resolve_fastembed_model_code(config, &settings.embedding_model)?; + Self::new_fastembed(Some(model_code), pool_size).await } EmbeddingBackend::Hashed => { let dimension = usize::try_from(dimensions).map_err(|_| { @@ -433,7 +585,11 @@ fn bucket(token: &str, dimension: usize) -> usize { mod tests { #![allow(clippy::expect_used)] - use super::{EmbeddingBackend, ParseEmbeddingBackendError}; + use super::{ + align_fastembed_system_settings, fastembed_model_dimension, list_fastembed_embedding_models, + resolve_fastembed_model_code, DEFAULT_FASTEMBED_MODEL_CODE, EmbeddingError, + }; + use crate::utils::config::{AppConfig, EmbeddingBackend, ParseEmbeddingBackendError}; use crate::storage::types::system_settings::SystemSettings; use serde_json::json; @@ -483,6 +639,72 @@ mod tests { )); } + #[test] + fn list_fastembed_embedding_models_includes_default() { + let models = list_fastembed_embedding_models(); + assert!( + models + .iter() + .any(|m| m.model_code == DEFAULT_FASTEMBED_MODEL_CODE), + "catalog should include the default FastEmbed model" + ); + } + + #[test] + fn resolve_fastembed_model_prefers_config_over_db() { + let config = AppConfig { + fastembed_model: Some("Xenova/bge-base-en-v1.5".into()), + ..AppConfig::default() + }; + let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small") + .expect("config model"); + assert_eq!(resolved, "Xenova/bge-base-en-v1.5"); + } + + #[test] + fn resolve_fastembed_model_falls_back_from_openai_default() { + let config = AppConfig::default(); + let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small") + .expect("default model"); + assert_eq!(resolved, DEFAULT_FASTEMBED_MODEL_CODE); + } + + #[test] + fn resolve_fastembed_model_rejects_invalid_config_override() { + let config = AppConfig { + fastembed_model: Some("not-a-real-model".into()), + ..AppConfig::default() + }; + let err = resolve_fastembed_model_code(&config, "Xenova/bge-small-en-v1.5") + .expect_err("invalid config model"); + assert!(matches!(err, EmbeddingError::UnknownModel(_))); + } + + #[test] + fn fastembed_model_dimension_matches_model_metadata() { + let dim = fastembed_model_dimension(DEFAULT_FASTEMBED_MODEL_CODE).expect("dim"); + assert_eq!(dim, 384); + } + + #[tokio::test] + async fn align_fastembed_system_settings_replaces_openai_default() -> anyhow::Result<()> { + use crate::storage::db::SurrealDbClient; + use uuid::Uuid; + + let db = SurrealDbClient::memory("align_fe", &Uuid::new_v4().to_string()).await?; + db.apply_migrations().await?; + + let config = AppConfig { + embedding_backend: EmbeddingBackend::FastEmbed, + ..AppConfig::default() + }; + + let settings = align_fastembed_system_settings(&db, &config).await?; + assert_eq!(settings.embedding_model, DEFAULT_FASTEMBED_MODEL_CODE); + assert_eq!(settings.embedding_dimensions, 384); + Ok(()) + } + #[test] fn system_settings_deserializes_embedding_backend_field() { let value = json!({ diff --git a/docs/configuration.md b/docs/configuration.md index 559059b..a453050 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -24,7 +24,8 @@ Minne can be configured via environment variables or a `config.yaml` file. Envir | `RUST_LOG` | Logging level | `info` | | `STORAGE` | Storage backend (`local`, `memory`, `s3`) | `local` | | `PDF_INGEST_MODE` | PDF ingestion strategy (`classic`, `llm-first`) | `llm-first` | -| `EMBEDDING_BACKEND` | Embedding provider (`openai`, `fastembed`) | `fastembed` | +| `EMBEDDING_BACKEND` | Embedding provider (`openai`, `fastembed`, `hashed`) | `fastembed` | +| `FASTEMBED_MODEL` | FastEmbed HuggingFace `model_code` (overrides DB when set) | `Xenova/bge-small-en-v1.5` | | `FASTEMBED_CACHE_DIR` | Model cache directory | `/fastembed` | | `FASTEMBED_SHOW_DOWNLOAD_PROGRESS` | Show progress bar for model downloads | `false` | | `FASTEMBED_MAX_LENGTH` | Max sequence length for FastEmbed models | - | @@ -76,6 +77,8 @@ storage: "local" # s3_region: "us-east-1" pdf_ingest_mode: "llm-first" embedding_backend: "fastembed" +# HuggingFace model_code (see fastembed docs); dimensions are fixed per model +fastembed_model: "Xenova/bge-small-en-v1.5" # Optional reranking reranking_enabled: true diff --git a/html-router/assets/style.css b/html-router/assets/style.css index 3431eb7..57fbde4 100644 --- a/html-router/assets/style.css +++ b/html-router/assets/style.css @@ -2027,6 +2027,12 @@ .border-base-200 { border-color: var(--color-base-200); } + .border-base-content\/10 { + border-color: var(--color-base-content); + @supports (color: color-mix(in lab, red, red)) { + border-color: color-mix(in oklab, var(--color-base-content) 10%, transparent); + } + } .border-error { border-color: var(--color-error); } @@ -2048,6 +2054,12 @@ .bg-base-200 { background-color: var(--color-base-200); } + .bg-base-200\/40 { + background-color: var(--color-base-200); + @supports (color: color-mix(in lab, red, red)) { + background-color: color-mix(in oklab, var(--color-base-200) 40%, transparent); + } + } .bg-transparent { background-color: transparent; } @@ -2334,6 +2346,9 @@ .opacity-80 { opacity: 80%; } + .opacity-90 { + opacity: 90%; + } .opacity-100 { opacity: 100%; } diff --git a/html-router/src/routes/admin/handlers.rs b/html-router/src/routes/admin/handlers.rs index 2fda03f..0c328ed 100644 --- a/html-router/src/routes/admin/handlers.rs +++ b/html-router/src/routes/admin/handlers.rs @@ -15,7 +15,13 @@ use common::{ }, system_settings::{SystemSettings, SystemSettingsPatch}, }, - utils::embedding::EmbeddingBackend, + utils::{ + config::AppConfig, + embedding::{ + fastembed_model_dimension, is_valid_fastembed_model_code, list_fastembed_embedding_models, + EmbeddingBackend, FastEmbedModelOption, + }, + }, }; use tracing::info; @@ -32,6 +38,9 @@ pub struct AdminPanelData { default_query_prompt: String, default_image_prompt: String, available_models: Option, + fastembed_models: Option>, + fastembed_model_locked_by_config: bool, + effective_embedding_backend: String, current_section: AdminSection, } @@ -70,18 +79,30 @@ pub async fn show_admin_panel( (None, None) }; - let available_models = if section == AdminSection::Models { - Some( - state - .openai_client - .models() - .list() - .await - .map_err(|e| AppError::InternalError(e.to_string()))?, - ) - } else { - None - }; + let (available_models, fastembed_models, fastembed_model_locked_by_config) = + if section == AdminSection::Models { + let available_models = Some( + state + .openai_client + .models() + .list() + .await + .map_err(|e| AppError::InternalError(e.to_string()))?, + ); + let fastembed_models = is_fastembed_admin_context(&settings, &state.config) + .then(list_fastembed_embedding_models); + let fastembed_model_locked_by_config = state.config.fastembed_model.is_some(); + ( + available_models, + fastembed_models, + fastembed_model_locked_by_config, + ) + } else { + (None, None, false) + }; + + let effective_backend = + effective_embedding_backend(&settings, &state.config).as_str().to_string(); Ok(TemplateResponse::new_template( "admin/base.html", @@ -89,6 +110,9 @@ pub async fn show_admin_panel( settings, analytics, available_models, + fastembed_models, + fastembed_model_locked_by_config, + effective_embedding_backend: effective_backend, users, default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(), default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(), @@ -150,6 +174,100 @@ pub struct ModelSettingsInput { pub struct ModelSettingsData { settings: SystemSettings, available_models: ListModelResponse, + fastembed_models: Option>, + fastembed_model_locked_by_config: bool, + effective_embedding_backend: String, +} + +struct EmbeddingSettingsPlan { + embedding_model: String, + embedding_dimensions: u32, + reembedding_needed: bool, + restart_needed: bool, +} + +fn effective_embedding_backend(settings: &SystemSettings, config: &AppConfig) -> EmbeddingBackend { + settings.embedding_backend.unwrap_or(config.embedding_backend) +} + +fn is_fastembed_admin_context(settings: &SystemSettings, config: &AppConfig) -> bool { + effective_embedding_backend(settings, config) == EmbeddingBackend::FastEmbed +} + +fn plan_embedding_settings_update( + current: &SystemSettings, + input: &ModelSettingsInput, + config: &AppConfig, +) -> Result { + match effective_embedding_backend(current, config) { + EmbeddingBackend::OpenAI => { + let reembedding_needed = input + .embedding_dimensions + .is_some_and(|new_dims| new_dims != current.embedding_dimensions); + let embedding_model = input + .embedding_model + .clone() + .unwrap_or_else(|| current.embedding_model.clone()); + let embedding_dimensions = input + .embedding_dimensions + .unwrap_or(current.embedding_dimensions); + Ok(EmbeddingSettingsPlan { + embedding_model, + embedding_dimensions, + reembedding_needed, + restart_needed: reembedding_needed, + }) + } + EmbeddingBackend::FastEmbed => { + if config.fastembed_model.is_some() { + return Ok(EmbeddingSettingsPlan { + embedding_model: current.embedding_model.clone(), + embedding_dimensions: current.embedding_dimensions, + reembedding_needed: false, + restart_needed: false, + }); + } + + let embedding_model = input + .embedding_model + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| current.embedding_model.clone()); + + if !is_valid_fastembed_model_code(&embedding_model) { + return Err(AppError::Validation(format!( + "Unknown FastEmbed model '{embedding_model}'. Choose a model from the list." + ))); + } + + let embedding_dimensions = fastembed_model_dimension(&embedding_model) + .map_err(AppError::from)?; + let reembedding_needed = embedding_dimensions != current.embedding_dimensions; + let restart_needed = + embedding_model != current.embedding_model || reembedding_needed; + + Ok(EmbeddingSettingsPlan { + embedding_model, + embedding_dimensions, + reembedding_needed, + restart_needed, + }) + } + EmbeddingBackend::Hashed => { + info!( + backend = ?current.embedding_backend, + "Embedding model/dimensions for hashed backend are controlled by config" + ); + Ok(EmbeddingSettingsPlan { + embedding_model: current.embedding_model.clone(), + embedding_dimensions: current.embedding_dimensions, + reembedding_needed: false, + restart_needed: false, + }) + } + } } pub async fn update_model_settings( @@ -157,63 +275,32 @@ pub async fn update_model_settings( Form(input): Form, ) -> TemplateResult { let current_settings = SystemSettings::get_current(&state.db).await?; - - // Check if using FastEmbed - if so, embedding model/dimensions cannot be changed via UI - let uses_local_embeddings = current_settings.embedding_backend.is_some_and( - |backend| matches!(backend, EmbeddingBackend::FastEmbed | EmbeddingBackend::Hashed), - ); - - // For local embeddings, ignore any embedding model/dimension changes from the form - let (final_embedding_model, final_embedding_dimensions, reembedding_needed) = - if uses_local_embeddings { - // Keep current values - they're controlled by config, not the admin UI - info!( - backend = ?current_settings.embedding_backend, - "Embedding model/dimensions controlled by config, ignoring form input" - ); - ( - current_settings.embedding_model.clone(), - current_settings.embedding_dimensions, - false, - ) - } else { - // OpenAI backend - allow changes from form - let reembedding_needed = input - .embedding_dimensions - .is_some_and(|new_dims| new_dims != current_settings.embedding_dimensions); - ( - input - .embedding_model - .unwrap_or_else(|| current_settings.embedding_model.clone()), - input - .embedding_dimensions - .unwrap_or(current_settings.embedding_dimensions), - reembedding_needed, - ) - }; + let embedding_plan = + plan_embedding_settings_update(¤t_settings, &input, &state.config)?; let new_settings = SystemSettingsPatch { query_model: Some(input.query_model), processing_model: Some(input.processing_model), image_processing_model: Some(input.image_processing_model), voice_processing_model: Some(input.voice_processing_model), - embedding_model: Some(final_embedding_model), - embedding_dimensions: Some(final_embedding_dimensions), + embedding_model: Some(embedding_plan.embedding_model), + embedding_dimensions: Some(embedding_plan.embedding_dimensions), ..Default::default() } .apply(&state.db) .await?; - if reembedding_needed { + if embedding_plan.reembedding_needed { // Re-embedding is owned by startup (the worker/combined binary), not the admin request. - // Doing it inline here would leave the live, startup-built embedding provider embedding - // queries at the old dimension while stored vectors move to the new one — broken retrieval - // until restart. Persisting the new settings is enough: on the next restart the maintainer - // detects the index/dimension mismatch and re-embeds before rebuilding indexes. info!( new_dimensions = new_settings.embedding_dimensions, "Embedding dimensions changed; restart the worker/server to re-embed and apply" ); + } else if embedding_plan.restart_needed { + info!( + new_model = %new_settings.embedding_model, + "Embedding model changed; restart the worker/server to apply" + ); } let available_models = state @@ -223,16 +310,98 @@ pub async fn update_model_settings( .await .map_err(|_e| AppError::InternalError("Failed to get models".to_string()))?; + let effective_backend = + effective_embedding_backend(&new_settings, &state.config).as_str().to_string(); + let show_fastembed_models = + is_fastembed_admin_context(&new_settings, &state.config).then(list_fastembed_embedding_models); + Ok(TemplateResponse::new_partial( "admin/sections/models.html", "model_settings_form", ModelSettingsData { settings: new_settings, available_models, + fastembed_models: show_fastembed_models, + fastembed_model_locked_by_config: state.config.fastembed_model.is_some(), + effective_embedding_backend: effective_backend, }, )) } +#[cfg(test)] +mod tests { + #![allow(clippy::expect_used)] + + use super::*; + use common::utils::config::AppConfig; + + fn openai_settings() -> SystemSettings { + SystemSettings { + id: "current".into(), + registrations_enabled: true, + require_email_verification: false, + query_model: "gpt-4o-mini".into(), + processing_model: "gpt-4o-mini".into(), + embedding_model: "text-embedding-3-small".into(), + embedding_dimensions: 1536, + embedding_backend: Some(EmbeddingBackend::OpenAI), + query_system_prompt: "q".into(), + ingestion_system_prompt: "i".into(), + image_processing_model: "gpt-4o-mini".into(), + image_processing_prompt: "p".into(), + voice_processing_model: "whisper-1".into(), + } + } + + #[test] + fn plan_fastembed_update_sets_dimensions_from_model_metadata() { + let current = SystemSettings { + embedding_backend: Some(EmbeddingBackend::FastEmbed), + embedding_model: "Xenova/bge-small-en-v1.5".into(), + embedding_dimensions: 384, + ..openai_settings() + }; + let input = ModelSettingsInput { + query_model: current.query_model.clone(), + processing_model: current.processing_model.clone(), + image_processing_model: current.image_processing_model.clone(), + voice_processing_model: current.voice_processing_model.clone(), + embedding_model: Some("Xenova/bge-base-en-v1.5".into()), + embedding_dimensions: None, + }; + let plan = plan_embedding_settings_update(¤t, &input, &AppConfig::default()) + .expect("plan"); + assert_eq!(plan.embedding_model, "Xenova/bge-base-en-v1.5"); + assert_eq!(plan.embedding_dimensions, 768); + assert!(plan.reembedding_needed); + assert!(plan.restart_needed); + } + + #[test] + fn plan_fastembed_ignores_form_when_config_overrides_model() { + let current = SystemSettings { + embedding_backend: Some(EmbeddingBackend::FastEmbed), + ..openai_settings() + }; + let input = ModelSettingsInput { + query_model: current.query_model.clone(), + processing_model: current.processing_model.clone(), + image_processing_model: current.image_processing_model.clone(), + voice_processing_model: current.voice_processing_model.clone(), + embedding_model: Some("Xenova/bge-large-en-v1.5".into()), + embedding_dimensions: None, + }; + let config = AppConfig { + embedding_backend: EmbeddingBackend::FastEmbed, + fastembed_model: Some("Xenova/bge-small-en-v1.5".into()), + ..AppConfig::default() + }; + let plan = plan_embedding_settings_update(¤t, &input, &config).expect("plan"); + assert_eq!(plan.embedding_model, current.embedding_model); + assert!(!plan.restart_needed); + } +} + #[derive(Serialize)] pub struct SystemPromptEditData { settings: SystemSettings, diff --git a/html-router/templates/admin/sections/models.html b/html-router/templates/admin/sections/models.html index 5d8e18c..905a317 100644 --- a/html-router/templates/admin/sections/models.html +++ b/html-router/templates/admin/sections/models.html @@ -4,7 +4,8 @@
AI Models

Model configuration

- Choose which models power conversational search, ingestion analysis, and embeddings. Adjusting embeddings may trigger a full reprocess. + Choose which models power conversational search, ingestion analysis, and embeddings. + Embedding dimension changes apply after you restart the worker or server.

Embedding Model
- {% if settings.embedding_backend == "fastembed" or settings.embedding_backend == "hashed" %} + {% if effective_embedding_backend == "fastembed" %} + {% if fastembed_model_locked_by_config %} + +

+ Overridden by fastembed_model in config.yaml at startup. Remove that setting to manage the model here. +

+ {% else %} + +

+ Save, then restart the worker or server to load the new model. First run may download weights. +

+ {% endif %} + {% elif effective_embedding_backend == "hashed" %} -

- Model: {{ settings.embedding_model }} ({{ settings.embedding_dimensions }} dims) -

- Info: Embedding model is controlled by config when using {{ settings.embedding_backend }} backend. + Hashed embeddings use embedding_dimensions from config, not the admin UI.

{% else %} +

+ Fixed by the selected FastEmbed model. A dimension change triggers a full re-embed after restart. +

+ {% elif effective_embedding_backend == "hashed" %}

- Info: Dimensions are fixed for {{ settings.embedding_backend }} backend. Set - EMBEDDING_BACKEND=openai to use OpenAI embeddings. + Set EMBEDDING_BACKEND=openai for OpenAI embeddings, or configure hashed dims in config.

{% else %} -

Changing dimensions will trigger a background re-embedding.

+

+ Saving a new dimension updates settings only. Restart the worker (or combined app) to re-embed stored data and rebuild indexes. +

{% endif %}
- {% if settings.embedding_backend != "fastembed" and settings.embedding_backend != "hashed" %} + {% if effective_embedding_backend == "fastembed" and not fastembed_model_locked_by_config %} +
+

+ FastEmbed: The running process keeps the model loaded until restart. Changing to a model with a + different dimension re-embeds all stored vectors on the next worker/server startup. +

+

+ Same-dimension model swaps update settings only; existing vectors are not automatically regenerated until you + change dimension (or re-embed via the OpenAI workaround described in ops docs). +

+
+ + + {% endif %} + + {% if effective_embedding_backend != "fastembed" and effective_embedding_backend != "hashed" %} +
+

+ Re-embedding stored data: Only a change to embedding_dimensions + followed by a restart triggers a full re-embed of text chunks and knowledge entities. Changing the embedding model alone + does not update vectors already in the database. +

+

+ To force a full re-embed (for example after switching models), save a different dimension integer, restart the + worker, then set the final dimension and model and restart again if needed. +

+
+ {% endif %} @@ -137,7 +203,26 @@ - {% if settings.embedding_backend != "fastembed" and settings.embedding_backend != "hashed" %} + {% if effective_embedding_backend == "fastembed" and not fastembed_model_locked_by_config %} + + {% endif %} + + {% if effective_embedding_backend != "fastembed" and effective_embedding_backend != "hashed" %}