mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-25 03:16:26 +02:00
feat: configure FastEmbed model in config and admin, with restart to apply
Expose fastembed_model in config and a model dropdown on Admin → Models. Persist dimension from the chosen model, require restart to load it, and align legacy OpenAI default settings so fresh local-embedding installs start cleanly.
This commit is contained in:
@@ -15,7 +15,13 @@ use common::{
|
||||
},
|
||||
system_settings::{SystemSettings, SystemSettingsPatch},
|
||||
},
|
||||
utils::embedding::EmbeddingBackend,
|
||||
utils::{
|
||||
config::AppConfig,
|
||||
embedding::{
|
||||
fastembed_model_dimension, is_valid_fastembed_model_code, list_fastembed_embedding_models,
|
||||
EmbeddingBackend, FastEmbedModelOption,
|
||||
},
|
||||
},
|
||||
};
|
||||
use tracing::info;
|
||||
|
||||
@@ -32,6 +38,9 @@ pub struct AdminPanelData {
|
||||
default_query_prompt: String,
|
||||
default_image_prompt: String,
|
||||
available_models: Option<ListModelResponse>,
|
||||
fastembed_models: Option<Vec<FastEmbedModelOption>>,
|
||||
fastembed_model_locked_by_config: bool,
|
||||
effective_embedding_backend: String,
|
||||
current_section: AdminSection,
|
||||
}
|
||||
|
||||
@@ -70,18 +79,30 @@ pub async fn show_admin_panel(
|
||||
(None, None)
|
||||
};
|
||||
|
||||
let available_models = if section == AdminSection::Models {
|
||||
Some(
|
||||
state
|
||||
.openai_client
|
||||
.models()
|
||||
.list()
|
||||
.await
|
||||
.map_err(|e| AppError::InternalError(e.to_string()))?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let (available_models, fastembed_models, fastembed_model_locked_by_config) =
|
||||
if section == AdminSection::Models {
|
||||
let available_models = Some(
|
||||
state
|
||||
.openai_client
|
||||
.models()
|
||||
.list()
|
||||
.await
|
||||
.map_err(|e| AppError::InternalError(e.to_string()))?,
|
||||
);
|
||||
let fastembed_models = is_fastembed_admin_context(&settings, &state.config)
|
||||
.then(list_fastembed_embedding_models);
|
||||
let fastembed_model_locked_by_config = state.config.fastembed_model.is_some();
|
||||
(
|
||||
available_models,
|
||||
fastembed_models,
|
||||
fastembed_model_locked_by_config,
|
||||
)
|
||||
} else {
|
||||
(None, None, false)
|
||||
};
|
||||
|
||||
let effective_backend =
|
||||
effective_embedding_backend(&settings, &state.config).as_str().to_string();
|
||||
|
||||
Ok(TemplateResponse::new_template(
|
||||
"admin/base.html",
|
||||
@@ -89,6 +110,9 @@ pub async fn show_admin_panel(
|
||||
settings,
|
||||
analytics,
|
||||
available_models,
|
||||
fastembed_models,
|
||||
fastembed_model_locked_by_config,
|
||||
effective_embedding_backend: effective_backend,
|
||||
users,
|
||||
default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
|
||||
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
|
||||
@@ -150,6 +174,100 @@ pub struct ModelSettingsInput {
|
||||
pub struct ModelSettingsData {
|
||||
settings: SystemSettings,
|
||||
available_models: ListModelResponse,
|
||||
fastembed_models: Option<Vec<FastEmbedModelOption>>,
|
||||
fastembed_model_locked_by_config: bool,
|
||||
effective_embedding_backend: String,
|
||||
}
|
||||
|
||||
struct EmbeddingSettingsPlan {
|
||||
embedding_model: String,
|
||||
embedding_dimensions: u32,
|
||||
reembedding_needed: bool,
|
||||
restart_needed: bool,
|
||||
}
|
||||
|
||||
fn effective_embedding_backend(settings: &SystemSettings, config: &AppConfig) -> EmbeddingBackend {
|
||||
settings.embedding_backend.unwrap_or(config.embedding_backend)
|
||||
}
|
||||
|
||||
fn is_fastembed_admin_context(settings: &SystemSettings, config: &AppConfig) -> bool {
|
||||
effective_embedding_backend(settings, config) == EmbeddingBackend::FastEmbed
|
||||
}
|
||||
|
||||
fn plan_embedding_settings_update(
|
||||
current: &SystemSettings,
|
||||
input: &ModelSettingsInput,
|
||||
config: &AppConfig,
|
||||
) -> Result<EmbeddingSettingsPlan, AppError> {
|
||||
match effective_embedding_backend(current, config) {
|
||||
EmbeddingBackend::OpenAI => {
|
||||
let reembedding_needed = input
|
||||
.embedding_dimensions
|
||||
.is_some_and(|new_dims| new_dims != current.embedding_dimensions);
|
||||
let embedding_model = input
|
||||
.embedding_model
|
||||
.clone()
|
||||
.unwrap_or_else(|| current.embedding_model.clone());
|
||||
let embedding_dimensions = input
|
||||
.embedding_dimensions
|
||||
.unwrap_or(current.embedding_dimensions);
|
||||
Ok(EmbeddingSettingsPlan {
|
||||
embedding_model,
|
||||
embedding_dimensions,
|
||||
reembedding_needed,
|
||||
restart_needed: reembedding_needed,
|
||||
})
|
||||
}
|
||||
EmbeddingBackend::FastEmbed => {
|
||||
if config.fastembed_model.is_some() {
|
||||
return Ok(EmbeddingSettingsPlan {
|
||||
embedding_model: current.embedding_model.clone(),
|
||||
embedding_dimensions: current.embedding_dimensions,
|
||||
reembedding_needed: false,
|
||||
restart_needed: false,
|
||||
});
|
||||
}
|
||||
|
||||
let embedding_model = input
|
||||
.embedding_model
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
.unwrap_or_else(|| current.embedding_model.clone());
|
||||
|
||||
if !is_valid_fastembed_model_code(&embedding_model) {
|
||||
return Err(AppError::Validation(format!(
|
||||
"Unknown FastEmbed model '{embedding_model}'. Choose a model from the list."
|
||||
)));
|
||||
}
|
||||
|
||||
let embedding_dimensions = fastembed_model_dimension(&embedding_model)
|
||||
.map_err(AppError::from)?;
|
||||
let reembedding_needed = embedding_dimensions != current.embedding_dimensions;
|
||||
let restart_needed =
|
||||
embedding_model != current.embedding_model || reembedding_needed;
|
||||
|
||||
Ok(EmbeddingSettingsPlan {
|
||||
embedding_model,
|
||||
embedding_dimensions,
|
||||
reembedding_needed,
|
||||
restart_needed,
|
||||
})
|
||||
}
|
||||
EmbeddingBackend::Hashed => {
|
||||
info!(
|
||||
backend = ?current.embedding_backend,
|
||||
"Embedding model/dimensions for hashed backend are controlled by config"
|
||||
);
|
||||
Ok(EmbeddingSettingsPlan {
|
||||
embedding_model: current.embedding_model.clone(),
|
||||
embedding_dimensions: current.embedding_dimensions,
|
||||
reembedding_needed: false,
|
||||
restart_needed: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_model_settings(
|
||||
@@ -157,63 +275,32 @@ pub async fn update_model_settings(
|
||||
Form(input): Form<ModelSettingsInput>,
|
||||
) -> TemplateResult {
|
||||
let current_settings = SystemSettings::get_current(&state.db).await?;
|
||||
|
||||
// Check if using FastEmbed - if so, embedding model/dimensions cannot be changed via UI
|
||||
let uses_local_embeddings = current_settings.embedding_backend.is_some_and(
|
||||
|backend| matches!(backend, EmbeddingBackend::FastEmbed | EmbeddingBackend::Hashed),
|
||||
);
|
||||
|
||||
// For local embeddings, ignore any embedding model/dimension changes from the form
|
||||
let (final_embedding_model, final_embedding_dimensions, reembedding_needed) =
|
||||
if uses_local_embeddings {
|
||||
// Keep current values - they're controlled by config, not the admin UI
|
||||
info!(
|
||||
backend = ?current_settings.embedding_backend,
|
||||
"Embedding model/dimensions controlled by config, ignoring form input"
|
||||
);
|
||||
(
|
||||
current_settings.embedding_model.clone(),
|
||||
current_settings.embedding_dimensions,
|
||||
false,
|
||||
)
|
||||
} else {
|
||||
// OpenAI backend - allow changes from form
|
||||
let reembedding_needed = input
|
||||
.embedding_dimensions
|
||||
.is_some_and(|new_dims| new_dims != current_settings.embedding_dimensions);
|
||||
(
|
||||
input
|
||||
.embedding_model
|
||||
.unwrap_or_else(|| current_settings.embedding_model.clone()),
|
||||
input
|
||||
.embedding_dimensions
|
||||
.unwrap_or(current_settings.embedding_dimensions),
|
||||
reembedding_needed,
|
||||
)
|
||||
};
|
||||
let embedding_plan =
|
||||
plan_embedding_settings_update(¤t_settings, &input, &state.config)?;
|
||||
|
||||
let new_settings = SystemSettingsPatch {
|
||||
query_model: Some(input.query_model),
|
||||
processing_model: Some(input.processing_model),
|
||||
image_processing_model: Some(input.image_processing_model),
|
||||
voice_processing_model: Some(input.voice_processing_model),
|
||||
embedding_model: Some(final_embedding_model),
|
||||
embedding_dimensions: Some(final_embedding_dimensions),
|
||||
embedding_model: Some(embedding_plan.embedding_model),
|
||||
embedding_dimensions: Some(embedding_plan.embedding_dimensions),
|
||||
..Default::default()
|
||||
}
|
||||
.apply(&state.db)
|
||||
.await?;
|
||||
|
||||
if reembedding_needed {
|
||||
if embedding_plan.reembedding_needed {
|
||||
// Re-embedding is owned by startup (the worker/combined binary), not the admin request.
|
||||
// Doing it inline here would leave the live, startup-built embedding provider embedding
|
||||
// queries at the old dimension while stored vectors move to the new one — broken retrieval
|
||||
// until restart. Persisting the new settings is enough: on the next restart the maintainer
|
||||
// detects the index/dimension mismatch and re-embeds before rebuilding indexes.
|
||||
info!(
|
||||
new_dimensions = new_settings.embedding_dimensions,
|
||||
"Embedding dimensions changed; restart the worker/server to re-embed and apply"
|
||||
);
|
||||
} else if embedding_plan.restart_needed {
|
||||
info!(
|
||||
new_model = %new_settings.embedding_model,
|
||||
"Embedding model changed; restart the worker/server to apply"
|
||||
);
|
||||
}
|
||||
|
||||
let available_models = state
|
||||
@@ -223,16 +310,98 @@ pub async fn update_model_settings(
|
||||
.await
|
||||
.map_err(|_e| AppError::InternalError("Failed to get models".to_string()))?;
|
||||
|
||||
let effective_backend =
|
||||
effective_embedding_backend(&new_settings, &state.config).as_str().to_string();
|
||||
let show_fastembed_models =
|
||||
is_fastembed_admin_context(&new_settings, &state.config).then(list_fastembed_embedding_models);
|
||||
|
||||
Ok(TemplateResponse::new_partial(
|
||||
"admin/sections/models.html",
|
||||
"model_settings_form",
|
||||
ModelSettingsData {
|
||||
settings: new_settings,
|
||||
available_models,
|
||||
fastembed_models: show_fastembed_models,
|
||||
fastembed_model_locked_by_config: state.config.fastembed_model.is_some(),
|
||||
effective_embedding_backend: effective_backend,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use super::*;
|
||||
use common::utils::config::AppConfig;
|
||||
|
||||
fn openai_settings() -> SystemSettings {
|
||||
SystemSettings {
|
||||
id: "current".into(),
|
||||
registrations_enabled: true,
|
||||
require_email_verification: false,
|
||||
query_model: "gpt-4o-mini".into(),
|
||||
processing_model: "gpt-4o-mini".into(),
|
||||
embedding_model: "text-embedding-3-small".into(),
|
||||
embedding_dimensions: 1536,
|
||||
embedding_backend: Some(EmbeddingBackend::OpenAI),
|
||||
query_system_prompt: "q".into(),
|
||||
ingestion_system_prompt: "i".into(),
|
||||
image_processing_model: "gpt-4o-mini".into(),
|
||||
image_processing_prompt: "p".into(),
|
||||
voice_processing_model: "whisper-1".into(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_fastembed_update_sets_dimensions_from_model_metadata() {
|
||||
let current = SystemSettings {
|
||||
embedding_backend: Some(EmbeddingBackend::FastEmbed),
|
||||
embedding_model: "Xenova/bge-small-en-v1.5".into(),
|
||||
embedding_dimensions: 384,
|
||||
..openai_settings()
|
||||
};
|
||||
let input = ModelSettingsInput {
|
||||
query_model: current.query_model.clone(),
|
||||
processing_model: current.processing_model.clone(),
|
||||
image_processing_model: current.image_processing_model.clone(),
|
||||
voice_processing_model: current.voice_processing_model.clone(),
|
||||
embedding_model: Some("Xenova/bge-base-en-v1.5".into()),
|
||||
embedding_dimensions: None,
|
||||
};
|
||||
let plan = plan_embedding_settings_update(¤t, &input, &AppConfig::default())
|
||||
.expect("plan");
|
||||
assert_eq!(plan.embedding_model, "Xenova/bge-base-en-v1.5");
|
||||
assert_eq!(plan.embedding_dimensions, 768);
|
||||
assert!(plan.reembedding_needed);
|
||||
assert!(plan.restart_needed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_fastembed_ignores_form_when_config_overrides_model() {
|
||||
let current = SystemSettings {
|
||||
embedding_backend: Some(EmbeddingBackend::FastEmbed),
|
||||
..openai_settings()
|
||||
};
|
||||
let input = ModelSettingsInput {
|
||||
query_model: current.query_model.clone(),
|
||||
processing_model: current.processing_model.clone(),
|
||||
image_processing_model: current.image_processing_model.clone(),
|
||||
voice_processing_model: current.voice_processing_model.clone(),
|
||||
embedding_model: Some("Xenova/bge-large-en-v1.5".into()),
|
||||
embedding_dimensions: None,
|
||||
};
|
||||
let config = AppConfig {
|
||||
embedding_backend: EmbeddingBackend::FastEmbed,
|
||||
fastembed_model: Some("Xenova/bge-small-en-v1.5".into()),
|
||||
..AppConfig::default()
|
||||
};
|
||||
let plan = plan_embedding_settings_update(¤t, &input, &config).expect("plan");
|
||||
assert_eq!(plan.embedding_model, current.embedding_model);
|
||||
assert!(!plan.restart_needed);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct SystemPromptEditData {
|
||||
settings: SystemSettings,
|
||||
|
||||
Reference in New Issue
Block a user