mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-18 04:29:34 +02:00
feat: configure FastEmbed model in config and admin, with restart to apply
Expose fastembed_model in config and a model dropdown on Admin → Models. Persist dimension from the chosen model, require restart to load it, and align legacy OpenAI default settings so fresh local-embedding installs start cleanly.
This commit is contained in:
@@ -117,6 +117,10 @@ pub struct AppConfig {
|
||||
pub fastembed_show_download_progress: Option<bool>,
|
||||
#[serde(default)]
|
||||
pub fastembed_max_length: Option<usize>,
|
||||
/// HuggingFace-style FastEmbed `model_code` (e.g. `Xenova/bge-small-en-v1.5`). Overrides
|
||||
/// `system_settings.embedding_model` when `embedding_backend` is `fastembed`.
|
||||
#[serde(default)]
|
||||
pub fastembed_model: Option<String>,
|
||||
#[serde(default)]
|
||||
pub embedding_backend: EmbeddingBackend,
|
||||
#[serde(default)]
|
||||
@@ -226,6 +230,7 @@ impl Default for AppConfig {
|
||||
fastembed_cache_dir: None,
|
||||
fastembed_show_download_progress: None,
|
||||
fastembed_max_length: None,
|
||||
fastembed_model: None,
|
||||
embedding_backend: EmbeddingBackend::default(),
|
||||
embedding_pool_size: None,
|
||||
ingest_max_body_bytes: default_ingest_max_body_bytes(),
|
||||
|
||||
@@ -6,13 +6,16 @@ use std::{
|
||||
thread::available_parallelism,
|
||||
};
|
||||
|
||||
use serde::Serialize;
|
||||
use tracing::warn;
|
||||
|
||||
use async_openai::{types::CreateEmbeddingRequestArgs, Client};
|
||||
use fastembed::{EmbeddingModel, ModelTrait, TextEmbedding, TextInitOptions};
|
||||
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
|
||||
|
||||
use crate::{
|
||||
error::EmbeddingError,
|
||||
storage::types::system_settings::SystemSettings,
|
||||
error::{AppError, EmbeddingError},
|
||||
storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
|
||||
utils::config::AppConfig,
|
||||
};
|
||||
|
||||
@@ -59,6 +62,152 @@ enum EmbeddingInner {
|
||||
/// progress logging while still amortising per-call lock/dispatch overhead.
|
||||
pub const RE_EMBED_BATCH_SIZE: usize = 128;
|
||||
|
||||
/// Default FastEmbed model (`BGESmallENV15`) when config and DB do not specify a valid code.
|
||||
pub const DEFAULT_FASTEMBED_MODEL_CODE: &str = "Xenova/bge-small-en-v1.5";
|
||||
|
||||
/// A supported FastEmbed model for admin UI and documentation.
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct FastEmbedModelOption {
|
||||
/// HuggingFace-style `model_code` accepted by [`EmbeddingModel::from_str`].
|
||||
pub model_code: String,
|
||||
/// Fixed output dimension for this model.
|
||||
pub dimension: u32,
|
||||
/// Short human-readable description from fastembed metadata.
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
/// Lists supported FastEmbed text embedding models (sorted by `model_code`).
|
||||
#[must_use]
|
||||
pub fn list_fastembed_embedding_models() -> Vec<FastEmbedModelOption> {
|
||||
let mut list: Vec<FastEmbedModelOption> = TextEmbedding::list_supported_models()
|
||||
.into_iter()
|
||||
.filter_map(|info| {
|
||||
let dimension = u32::try_from(info.dim).ok()?;
|
||||
Some(FastEmbedModelOption {
|
||||
model_code: info.model_code,
|
||||
dimension,
|
||||
description: info.description,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
list.sort_by(|left, right| left.model_code.cmp(&right.model_code));
|
||||
list
|
||||
}
|
||||
|
||||
/// Returns true when `code` is a supported FastEmbed `model_code` (HuggingFace-style id).
|
||||
#[must_use]
|
||||
pub fn is_valid_fastembed_model_code(code: &str) -> bool {
|
||||
!code.trim().is_empty() && EmbeddingModel::from_str(code.trim()).is_ok()
|
||||
}
|
||||
|
||||
/// Vector dimension for a supported FastEmbed `model_code`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`EmbeddingError::UnknownModel`] when the code is not recognized.
|
||||
pub fn fastembed_model_dimension(code: &str) -> Result<u32, EmbeddingError> {
|
||||
let model = EmbeddingModel::from_str(code.trim())
|
||||
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(code)))?;
|
||||
let dim = EmbeddingModel::get_model_info(&model)
|
||||
.ok_or_else(|| {
|
||||
EmbeddingError::Config(format!("fastembed model metadata missing for {code}"))
|
||||
})?
|
||||
.dim;
|
||||
u32::try_from(dim).map_err(|_| {
|
||||
EmbeddingError::Config(format!("fastembed model dimension {dim} exceeds u32::MAX"))
|
||||
})
|
||||
}
|
||||
|
||||
/// Resolves the FastEmbed model code to load: config override, then DB, then default.
|
||||
///
|
||||
/// When `config.fastembed_model` is set it must be valid. When only the DB value is used and it
|
||||
/// is not a FastEmbed code (e.g. legacy `text-embedding-3-small`), returns the default model.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`EmbeddingError::UnknownModel`] if `config.fastembed_model` is set but invalid.
|
||||
pub fn resolve_fastembed_model_code(
|
||||
config: &AppConfig,
|
||||
settings_model: &str,
|
||||
) -> Result<String, EmbeddingError> {
|
||||
if let Some(code) = config.fastembed_model.as_deref() {
|
||||
let trimmed = code.trim();
|
||||
if trimmed.is_empty() {
|
||||
return Err(EmbeddingError::Config(
|
||||
"fastembed_model must not be empty when set".into(),
|
||||
));
|
||||
}
|
||||
EmbeddingModel::from_str(trimmed)
|
||||
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(trimmed)))?;
|
||||
return Ok(trimmed.to_owned());
|
||||
}
|
||||
|
||||
let trimmed = settings_model.trim();
|
||||
if is_valid_fastembed_model_code(trimmed) {
|
||||
return Ok(trimmed.to_owned());
|
||||
}
|
||||
|
||||
if !trimmed.is_empty() {
|
||||
warn!(
|
||||
stored_model = trimmed,
|
||||
default_model = DEFAULT_FASTEMBED_MODEL_CODE,
|
||||
"system_settings.embedding_model is not a FastEmbed model code; using default"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(DEFAULT_FASTEMBED_MODEL_CODE.to_owned())
|
||||
}
|
||||
|
||||
/// Persists a FastEmbed-compatible `embedding_model` and `embedding_dimensions` before startup
|
||||
/// when the active backend is FastEmbed and stored settings still carry OpenAI defaults.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`AppError`] if settings cannot be loaded, resolved, or updated.
|
||||
pub async fn align_fastembed_system_settings(
|
||||
db: &SurrealDbClient,
|
||||
config: &AppConfig,
|
||||
) -> Result<SystemSettings, AppError> {
|
||||
if config.embedding_backend != EmbeddingBackend::FastEmbed {
|
||||
return SystemSettings::get_current(db).await;
|
||||
}
|
||||
|
||||
let mut settings = SystemSettings::get_current(db).await?;
|
||||
let resolved = resolve_fastembed_model_code(config, &settings.embedding_model)?;
|
||||
let dimension = fastembed_model_dimension(&resolved)?;
|
||||
|
||||
if settings.embedding_model == resolved && settings.embedding_dimensions == dimension {
|
||||
return Ok(settings);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
old_model = %settings.embedding_model,
|
||||
new_model = %resolved,
|
||||
old_dimensions = settings.embedding_dimensions,
|
||||
new_dimensions = dimension,
|
||||
"Aligning system settings with FastEmbed model"
|
||||
);
|
||||
settings.embedding_model = resolved;
|
||||
settings.embedding_dimensions = dimension;
|
||||
SystemSettings::update(db, settings).await
|
||||
}
|
||||
|
||||
fn unknown_fastembed_model_message(code: &str) -> String {
|
||||
let mut codes: Vec<String> = TextEmbedding::list_supported_models()
|
||||
.into_iter()
|
||||
.map(|info| info.model_code)
|
||||
.collect();
|
||||
codes.sort();
|
||||
let examples: Vec<&str> = codes.iter().take(6).map(String::as_str).collect();
|
||||
format!(
|
||||
"unknown FastEmbed model '{code}' (expected a HuggingFace model_code such as {}). \
|
||||
Set fastembed_model in config.yaml or update system_settings; \
|
||||
see docs/configuration.md ({count} models supported)",
|
||||
examples.join(", "),
|
||||
count = codes.len()
|
||||
)
|
||||
}
|
||||
|
||||
/// Default FastEmbed pool size.
|
||||
///
|
||||
/// Kept small on purpose: the ONNX runtime already uses intra-op threads per inference, so
|
||||
@@ -294,7 +443,8 @@ impl EmbeddingProvider {
|
||||
) -> Result<Self, EmbeddingError> {
|
||||
let pool_size = pool_size.max(1);
|
||||
let model_name = if let Some(code) = model_override {
|
||||
EmbeddingModel::from_str(&code).map_err(EmbeddingError::UnknownModel)?
|
||||
EmbeddingModel::from_str(code.trim())
|
||||
.map_err(|_| EmbeddingError::UnknownModel(unknown_fastembed_model_message(&code)))?
|
||||
} else {
|
||||
EmbeddingModel::default()
|
||||
};
|
||||
@@ -349,9 +499,10 @@ impl EmbeddingProvider {
|
||||
|
||||
/// Creates an embedding provider from persisted settings and bootstrap config.
|
||||
///
|
||||
/// Model name and dimensions come from [`SystemSettings`]. The active backend is taken
|
||||
/// from `config.embedding_backend` at startup; [`SystemSettings::sync_from_embedding_provider`]
|
||||
/// persists the resolved backend to the database.
|
||||
/// OpenAI/hashed model settings come from [`SystemSettings`]. FastEmbed uses
|
||||
/// [`resolve_fastembed_model_code`] (config `fastembed_model` overrides DB). The active
|
||||
/// backend is taken from `config.embedding_backend`; [`SystemSettings::sync_from_embedding_provider`]
|
||||
/// persists the resolved backend to the database after startup.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
@@ -375,7 +526,8 @@ impl EmbeddingProvider {
|
||||
let pool_size = config
|
||||
.embedding_pool_size
|
||||
.unwrap_or_else(default_embedding_pool_size);
|
||||
Self::new_fastembed(Some(settings.embedding_model.clone()), pool_size).await
|
||||
let model_code = resolve_fastembed_model_code(config, &settings.embedding_model)?;
|
||||
Self::new_fastembed(Some(model_code), pool_size).await
|
||||
}
|
||||
EmbeddingBackend::Hashed => {
|
||||
let dimension = usize::try_from(dimensions).map_err(|_| {
|
||||
@@ -433,7 +585,11 @@ fn bucket(token: &str, dimension: usize) -> usize {
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use super::{EmbeddingBackend, ParseEmbeddingBackendError};
|
||||
use super::{
|
||||
align_fastembed_system_settings, fastembed_model_dimension, list_fastembed_embedding_models,
|
||||
resolve_fastembed_model_code, DEFAULT_FASTEMBED_MODEL_CODE, EmbeddingError,
|
||||
};
|
||||
use crate::utils::config::{AppConfig, EmbeddingBackend, ParseEmbeddingBackendError};
|
||||
use crate::storage::types::system_settings::SystemSettings;
|
||||
use serde_json::json;
|
||||
|
||||
@@ -483,6 +639,72 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_fastembed_embedding_models_includes_default() {
|
||||
let models = list_fastembed_embedding_models();
|
||||
assert!(
|
||||
models
|
||||
.iter()
|
||||
.any(|m| m.model_code == DEFAULT_FASTEMBED_MODEL_CODE),
|
||||
"catalog should include the default FastEmbed model"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_fastembed_model_prefers_config_over_db() {
|
||||
let config = AppConfig {
|
||||
fastembed_model: Some("Xenova/bge-base-en-v1.5".into()),
|
||||
..AppConfig::default()
|
||||
};
|
||||
let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small")
|
||||
.expect("config model");
|
||||
assert_eq!(resolved, "Xenova/bge-base-en-v1.5");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_fastembed_model_falls_back_from_openai_default() {
|
||||
let config = AppConfig::default();
|
||||
let resolved = resolve_fastembed_model_code(&config, "text-embedding-3-small")
|
||||
.expect("default model");
|
||||
assert_eq!(resolved, DEFAULT_FASTEMBED_MODEL_CODE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_fastembed_model_rejects_invalid_config_override() {
|
||||
let config = AppConfig {
|
||||
fastembed_model: Some("not-a-real-model".into()),
|
||||
..AppConfig::default()
|
||||
};
|
||||
let err = resolve_fastembed_model_code(&config, "Xenova/bge-small-en-v1.5")
|
||||
.expect_err("invalid config model");
|
||||
assert!(matches!(err, EmbeddingError::UnknownModel(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastembed_model_dimension_matches_model_metadata() {
|
||||
let dim = fastembed_model_dimension(DEFAULT_FASTEMBED_MODEL_CODE).expect("dim");
|
||||
assert_eq!(dim, 384);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn align_fastembed_system_settings_replaces_openai_default() -> anyhow::Result<()> {
|
||||
use crate::storage::db::SurrealDbClient;
|
||||
use uuid::Uuid;
|
||||
|
||||
let db = SurrealDbClient::memory("align_fe", &Uuid::new_v4().to_string()).await?;
|
||||
db.apply_migrations().await?;
|
||||
|
||||
let config = AppConfig {
|
||||
embedding_backend: EmbeddingBackend::FastEmbed,
|
||||
..AppConfig::default()
|
||||
};
|
||||
|
||||
let settings = align_fastembed_system_settings(&db, &config).await?;
|
||||
assert_eq!(settings.embedding_model, DEFAULT_FASTEMBED_MODEL_CODE);
|
||||
assert_eq!(settings.embedding_dimensions, 384);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn system_settings_deserializes_embedding_backend_field() {
|
||||
let value = json!({
|
||||
|
||||
Reference in New Issue
Block a user