feat: configure FastEmbed model in config and admin, with restart to apply

Expose fastembed_model in config and a model dropdown on Admin → Models.
Persist dimension from the chosen model, require restart to load it, and
align legacy OpenAI default settings so fresh local-embedding installs
start cleanly.
This commit is contained in:
Per Stark
2026-06-04 21:48:12 +02:00
parent 15c9f18f6e
commit 4e20da538d
10 changed files with 735 additions and 82 deletions
+5
View File
@@ -117,6 +117,10 @@ pub struct AppConfig {
pub fastembed_show_download_progress: Option<bool>,
#[serde(default)]
pub fastembed_max_length: Option<usize>,
/// HuggingFace-style FastEmbed `model_code` (e.g. `Xenova/bge-small-en-v1.5`). Overrides
/// `system_settings.embedding_model` when `embedding_backend` is `fastembed`.
#[serde(default)]
pub fastembed_model: Option<String>,
#[serde(default)]
pub embedding_backend: EmbeddingBackend,
#[serde(default)]
@@ -226,6 +230,7 @@ impl Default for AppConfig {
fastembed_cache_dir: None,
fastembed_show_download_progress: None,
fastembed_max_length: None,
fastembed_model: None,
embedding_backend: EmbeddingBackend::default(),
embedding_pool_size: None,
ingest_max_body_bytes: default_ingest_max_body_bytes(),
+230 -8
View File
@@ -6,13 +6,16 @@ use std::{
thread::available_parallelism,
};
use serde::Serialize;
use tracing::warn;
use async_openai::{types::CreateEmbeddingRequestArgs, Client};
use fastembed::{EmbeddingModel, ModelTrait, TextEmbedding, TextInitOptions};
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
use crate::{
error::EmbeddingError,
storage::types::system_settings::SystemSettings,
error::{AppError, EmbeddingError},
storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
utils::config::AppConfig,
};
@@ -59,6 +62,152 @@ enum EmbeddingInner {
/// progress logging while still amortising per-call lock/dispatch overhead.
pub const RE_EMBED_BATCH_SIZE: usize = 128;
/// Default FastEmbed model (`BGESmallENV15`) when config and DB do not specify a valid code.
pub const DEFAULT_FASTEMBED_MODEL_CODE: &str = "Xenova/bge-small-en-v1.5";
/// A supported FastEmbed model for admin UI and documentation.
#[derive(Clone, Debug, Serialize)]
pub struct FastEmbedModelOption {
/// HuggingFace-style `model_code` accepted by [`EmbeddingModel::from_str`].
pub model_code: String,
/// Fixed output dimension for this model.
pub dimension: u32,
/// Short human-readable description from fastembed metadata.
pub description: String,
}
/// Lists supported FastEmbed text embedding models (sorted by `model_code`).
#[must_use]
pub fn list_fastembed_embedding_models() -> Vec<FastEmbedModelOption> {
let mut list: Vec<FastEmbedModelOption> = TextEmbedding::list_supported_models()
.into_iter()
.filter_map(|info| {
let dimension = u32::try_from(info.dim).ok()?;
Some(FastEmbedModelOption {
model_code: info.model_code,
dimension,
description: info.description,
})
})
.collect();
list.sort_by(|left, right| left.model_code.cmp(&right.model_code));
list
}
/// Returns true when `code` is a supported FastEmbed `model_code` (HuggingFace-style id).
#[must_use]
pub fn is_valid_fastembed_model_code(code: &str) -> bool {
!code.trim().is_empty() && EmbeddingModel::from_str(code.trim()).is_ok()
}
/// Vector dimension for a supported FastEmbed `model_code`.
///
/// # Errors
///
/// Returns [`EmbeddingError::UnknownModel`] when the code is not recognized.
pub fn fastembed_model_dimension(code: &str) -> Result<u32, EmbeddingError> {
let model = EmbeddingModel::from_str(code.trim())
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(code)))?;
let dim = EmbeddingModel::get_model_info(&model)
.ok_or_else(|| {
EmbeddingError::Config(format!("fastembed model metadata missing for {code}"))
})?
.dim;
u32::try_from(dim).map_err(|_| {
EmbeddingError::Config(format!("fastembed model dimension {dim} exceeds u32::MAX"))
})
}
/// Resolves the FastEmbed model code to load: config override, then DB, then default.
///
/// When `config.fastembed_model` is set it must be valid. When only the DB value is used and it
/// is not a FastEmbed code (e.g. legacy `text-embedding-3-small`), returns the default model.
///
/// # Errors
///
/// Returns [`EmbeddingError::UnknownModel`] if `config.fastembed_model` is set but invalid.
pub fn resolve_fastembed_model_code(
config: &AppConfig,
settings_model: &str,
) -> Result<String, EmbeddingError> {
if let Some(code) = config.fastembed_model.as_deref() {
let trimmed = code.trim();
if trimmed.is_empty() {
return Err(EmbeddingError::Config(
"fastembed_model must not be empty when set".into(),
));
}
EmbeddingModel::from_str(trimmed)
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(trimmed)))?;
return Ok(trimmed.to_owned());
}
let trimmed = settings_model.trim();
if is_valid_fastembed_model_code(trimmed) {
return Ok(trimmed.to_owned());
}
if !trimmed.is_empty() {
warn!(
stored_model = trimmed,
default_model = DEFAULT_FASTEMBED_MODEL_CODE,
"system_settings.embedding_model is not a FastEmbed model code; using default"
);
}
Ok(DEFAULT_FASTEMBED_MODEL_CODE.to_owned())
}
/// Persists a FastEmbed-compatible `embedding_model` and `embedding_dimensions` before startup
/// when the active backend is FastEmbed and stored settings still carry OpenAI defaults.
///
/// # Errors
///
/// Returns [`AppError`] if settings cannot be loaded, resolved, or updated.
pub async fn align_fastembed_system_settings(
db: &SurrealDbClient,
config: &AppConfig,
) -> Result<SystemSettings, AppError> {
if config.embedding_backend != EmbeddingBackend::FastEmbed {
return SystemSettings::get_current(db).await;
}
let mut settings = SystemSettings::get_current(db).await?;
let resolved = resolve_fastembed_model_code(config, &settings.embedding_model)?;
let dimension = fastembed_model_dimension(&resolved)?;
if settings.embedding_model == resolved && settings.embedding_dimensions == dimension {
return Ok(settings);
}
tracing::info!(
old_model = %settings.embedding_model,
new_model = %resolved,
old_dimensions = settings.embedding_dimensions,
new_dimensions = dimension,
"Aligning system settings with FastEmbed model"
);
settings.embedding_model = resolved;
settings.embedding_dimensions = dimension;
SystemSettings::update(db, settings).await
}
fn unknown_fastembed_model_message(code: &str) -> String {
let mut codes: Vec<String> = TextEmbedding::list_supported_models()
.into_iter()
.map(|info| info.model_code)
.collect();
codes.sort();
let examples: Vec<&str> = codes.iter().take(6).map(String::as_str).collect();
format!(
"unknown FastEmbed model '{code}' (expected a HuggingFace model_code such as {}). \
Set fastembed_model in config.yaml or update system_settings; \
see docs/configuration.md ({count} models supported)",
examples.join(", "),
count = codes.len()
)
}
/// Default FastEmbed pool size.
///
/// Kept small on purpose: the ONNX runtime already uses intra-op threads per inference, so
@@ -294,7 +443,8 @@ impl EmbeddingProvider {
) -> Result<Self, EmbeddingError> {
let pool_size = pool_size.max(1);
let model_name = if let Some(code) = model_override {
EmbeddingModel::from_str(&code).map_err(EmbeddingError::UnknownModel)?
EmbeddingModel::from_str(code.trim())
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(&code)))?
} else {
EmbeddingModel::default()
};
@@ -349,9 +499,10 @@ impl EmbeddingProvider {
/// Creates an embedding provider from persisted settings and bootstrap config.
///
/// Model name and dimensions come from [`SystemSettings`]. The active backend is taken
/// from `config.embedding_backend` at startup; [`SystemSettings::sync_from_embedding_provider`]
/// persists the resolved backend to the database.
/// OpenAI/hashed model settings come from [`SystemSettings`]. FastEmbed uses
/// [`resolve_fastembed_model_code`] (config `fastembed_model` overrides DB). The active
/// backend is taken from `config.embedding_backend`; [`SystemSettings::sync_from_embedding_provider`]
/// persists the resolved backend to the database after startup.
///
/// # Errors
///
@@ -375,7 +526,8 @@ impl EmbeddingProvider {
let pool_size = config
.embedding_pool_size
.unwrap_or_else(default_embedding_pool_size);
Self::new_fastembed(Some(settings.embedding_model.clone()), pool_size).await
let model_code = resolve_fastembed_model_code(config, &settings.embedding_model)?;
Self::new_fastembed(Some(model_code), pool_size).await
}
EmbeddingBackend::Hashed => {
let dimension = usize::try_from(dimensions).map_err(|_| {
@@ -433,7 +585,11 @@ fn bucket(token: &str, dimension: usize) -> usize {
mod tests {
#![allow(clippy::expect_used)]
use super::{EmbeddingBackend, ParseEmbeddingBackendError};
use super::{
align_fastembed_system_settings, fastembed_model_dimension, list_fastembed_embedding_models,
resolve_fastembed_model_code, DEFAULT_FASTEMBED_MODEL_CODE, EmbeddingError,
};
use crate::utils::config::{AppConfig, EmbeddingBackend, ParseEmbeddingBackendError};
use crate::storage::types::system_settings::SystemSettings;
use serde_json::json;
@@ -483,6 +639,72 @@ mod tests {
));
}
#[test]
fn list_fastembed_embedding_models_includes_default() {
let models = list_fastembed_embedding_models();
assert!(
models
.iter()
.any(|m| m.model_code == DEFAULT_FASTEMBED_MODEL_CODE),
"catalog should include the default FastEmbed model"
);
}
#[test]
fn resolve_fastembed_model_prefers_config_over_db() {
let config = AppConfig {
fastembed_model: Some("Xenova/bge-base-en-v1.5".into()),
..AppConfig::default()
};
let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small")
.expect("config model");
assert_eq!(resolved, "Xenova/bge-base-en-v1.5");
}
#[test]
fn resolve_fastembed_model_falls_back_from_openai_default() {
let config = AppConfig::default();
let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small")
.expect("default model");
assert_eq!(resolved, DEFAULT_FASTEMBED_MODEL_CODE);
}
#[test]
fn resolve_fastembed_model_rejects_invalid_config_override() {
let config = AppConfig {
fastembed_model: Some("not-a-real-model".into()),
..AppConfig::default()
};
let err = resolve_fastembed_model_code(&config, "Xenova/bge-small-en-v1.5")
.expect_err("invalid config model");
assert!(matches!(err, EmbeddingError::UnknownModel(_)));
}
#[test]
fn fastembed_model_dimension_matches_model_metadata() {
let dim = fastembed_model_dimension(DEFAULT_FASTEMBED_MODEL_CODE).expect("dim");
assert_eq!(dim, 384);
}
#[tokio::test]
async fn align_fastembed_system_settings_replaces_openai_default() -> anyhow::Result<()> {
use crate::storage::db::SurrealDbClient;
use uuid::Uuid;
let db = SurrealDbClient::memory("align_fe", &Uuid::new_v4().to_string()).await?;
db.apply_migrations().await?;
let config = AppConfig {
embedding_backend: EmbeddingBackend::FastEmbed,
..AppConfig::default()
};
let settings = align_fastembed_system_settings(&db, &config).await?;
assert_eq!(settings.embedding_model, DEFAULT_FASTEMBED_MODEL_CODE);
assert_eq!(settings.embedding_dimensions, 384);
Ok(())
}
#[test]
fn system_settings_deserializes_embedding_backend_field() {
let value = json!({