feat: configure FastEmbed model in config and admin, with restart to apply

Expose fastembed_model in config and a model dropdown on Admin → Models.
Persist dimension from the chosen model, require restart to load it, and
align legacy OpenAI default settings so fresh local-embedding installs
start cleanly.
This commit is contained in:
Per Stark
2026-06-04 21:48:12 +02:00
parent 15c9f18f6e
commit 4e20da538d
10 changed files with 735 additions and 82 deletions
+15
View File
@@ -2027,6 +2027,12 @@
.border-base-200 {
border-color: var(--color-base-200);
}
.border-base-content\/10 {
border-color: var(--color-base-content);
@supports (color: color-mix(in lab, red, red)) {
border-color: color-mix(in oklab, var(--color-base-content) 10%, transparent);
}
}
.border-error {
border-color: var(--color-error);
}
@@ -2048,6 +2054,12 @@
.bg-base-200 {
background-color: var(--color-base-200);
}
.bg-base-200\/40 {
background-color: var(--color-base-200);
@supports (color: color-mix(in lab, red, red)) {
background-color: color-mix(in oklab, var(--color-base-200) 40%, transparent);
}
}
.bg-transparent {
background-color: transparent;
}
@@ -2334,6 +2346,9 @@
.opacity-80 {
opacity: 80%;
}
.opacity-90 {
opacity: 90%;
}
.opacity-100 {
opacity: 100%;
}
+223 -54
View File
@@ -15,7 +15,13 @@ use common::{
},
system_settings::{SystemSettings, SystemSettingsPatch},
},
utils::embedding::EmbeddingBackend,
utils::{
config::AppConfig,
embedding::{
fastembed_model_dimension, is_valid_fastembed_model_code, list_fastembed_embedding_models,
EmbeddingBackend, FastEmbedModelOption,
},
},
};
use tracing::info;
@@ -32,6 +38,9 @@ pub struct AdminPanelData {
default_query_prompt: String,
default_image_prompt: String,
available_models: Option<ListModelResponse>,
fastembed_models: Option<Vec<FastEmbedModelOption>>,
fastembed_model_locked_by_config: bool,
effective_embedding_backend: String,
current_section: AdminSection,
}
@@ -70,18 +79,30 @@ pub async fn show_admin_panel(
(None, None)
};
let available_models = if section == AdminSection::Models {
Some(
state
.openai_client
.models()
.list()
.await
.map_err(|e| AppError::InternalError(e.to_string()))?,
)
} else {
None
};
let (available_models, fastembed_models, fastembed_model_locked_by_config) =
if section == AdminSection::Models {
let available_models = Some(
state
.openai_client
.models()
.list()
.await
.map_err(|e| AppError::InternalError(e.to_string()))?,
);
let fastembed_models = is_fastembed_admin_context(&settings, &state.config)
.then(list_fastembed_embedding_models);
let fastembed_model_locked_by_config = state.config.fastembed_model.is_some();
(
available_models,
fastembed_models,
fastembed_model_locked_by_config,
)
} else {
(None, None, false)
};
let effective_backend =
effective_embedding_backend(&settings, &state.config).as_str().to_string();
Ok(TemplateResponse::new_template(
"admin/base.html",
@@ -89,6 +110,9 @@ pub async fn show_admin_panel(
settings,
analytics,
available_models,
fastembed_models,
fastembed_model_locked_by_config,
effective_embedding_backend: effective_backend,
users,
default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
@@ -150,6 +174,100 @@ pub struct ModelSettingsInput {
pub struct ModelSettingsData {
settings: SystemSettings,
available_models: ListModelResponse,
fastembed_models: Option<Vec<FastEmbedModelOption>>,
fastembed_model_locked_by_config: bool,
effective_embedding_backend: String,
}
struct EmbeddingSettingsPlan {
embedding_model: String,
embedding_dimensions: u32,
reembedding_needed: bool,
restart_needed: bool,
}
fn effective_embedding_backend(settings: &SystemSettings, config: &AppConfig) -> EmbeddingBackend {
settings.embedding_backend.unwrap_or(config.embedding_backend)
}
fn is_fastembed_admin_context(settings: &SystemSettings, config: &AppConfig) -> bool {
effective_embedding_backend(settings, config) == EmbeddingBackend::FastEmbed
}
fn plan_embedding_settings_update(
current: &SystemSettings,
input: &ModelSettingsInput,
config: &AppConfig,
) -> Result<EmbeddingSettingsPlan, AppError> {
match effective_embedding_backend(current, config) {
EmbeddingBackend::OpenAI => {
let reembedding_needed = input
.embedding_dimensions
.is_some_and(|new_dims| new_dims != current.embedding_dimensions);
let embedding_model = input
.embedding_model
.clone()
.unwrap_or_else(|| current.embedding_model.clone());
let embedding_dimensions = input
.embedding_dimensions
.unwrap_or(current.embedding_dimensions);
Ok(EmbeddingSettingsPlan {
embedding_model,
embedding_dimensions,
reembedding_needed,
restart_needed: reembedding_needed,
})
}
EmbeddingBackend::FastEmbed => {
if config.fastembed_model.is_some() {
return Ok(EmbeddingSettingsPlan {
embedding_model: current.embedding_model.clone(),
embedding_dimensions: current.embedding_dimensions,
reembedding_needed: false,
restart_needed: false,
});
}
let embedding_model = input
.embedding_model
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| current.embedding_model.clone());
if !is_valid_fastembed_model_code(&embedding_model) {
return Err(AppError::Validation(format!(
"Unknown FastEmbed model '{embedding_model}'. Choose a model from the list."
)));
}
let embedding_dimensions = fastembed_model_dimension(&embedding_model)
.map_err(AppError::from)?;
let reembedding_needed = embedding_dimensions != current.embedding_dimensions;
let restart_needed =
embedding_model != current.embedding_model || reembedding_needed;
Ok(EmbeddingSettingsPlan {
embedding_model,
embedding_dimensions,
reembedding_needed,
restart_needed,
})
}
EmbeddingBackend::Hashed => {
info!(
backend = ?current.embedding_backend,
"Embedding model/dimensions for hashed backend are controlled by config"
);
Ok(EmbeddingSettingsPlan {
embedding_model: current.embedding_model.clone(),
embedding_dimensions: current.embedding_dimensions,
reembedding_needed: false,
restart_needed: false,
})
}
}
}
pub async fn update_model_settings(
@@ -157,63 +275,32 @@ pub async fn update_model_settings(
Form(input): Form<ModelSettingsInput>,
) -> TemplateResult {
let current_settings = SystemSettings::get_current(&state.db).await?;
// Check if using FastEmbed - if so, embedding model/dimensions cannot be changed via UI
let uses_local_embeddings = current_settings.embedding_backend.is_some_and(
|backend| matches!(backend, EmbeddingBackend::FastEmbed | EmbeddingBackend::Hashed),
);
// For local embeddings, ignore any embedding model/dimension changes from the form
let (final_embedding_model, final_embedding_dimensions, reembedding_needed) =
if uses_local_embeddings {
// Keep current values - they're controlled by config, not the admin UI
info!(
backend = ?current_settings.embedding_backend,
"Embedding model/dimensions controlled by config, ignoring form input"
);
(
current_settings.embedding_model.clone(),
current_settings.embedding_dimensions,
false,
)
} else {
// OpenAI backend - allow changes from form
let reembedding_needed = input
.embedding_dimensions
.is_some_and(|new_dims| new_dims != current_settings.embedding_dimensions);
(
input
.embedding_model
.unwrap_or_else(|| current_settings.embedding_model.clone()),
input
.embedding_dimensions
.unwrap_or(current_settings.embedding_dimensions),
reembedding_needed,
)
};
let embedding_plan =
plan_embedding_settings_update(&current_settings, &input, &state.config)?;
let new_settings = SystemSettingsPatch {
query_model: Some(input.query_model),
processing_model: Some(input.processing_model),
image_processing_model: Some(input.image_processing_model),
voice_processing_model: Some(input.voice_processing_model),
embedding_model: Some(final_embedding_model),
embedding_dimensions: Some(final_embedding_dimensions),
embedding_model: Some(embedding_plan.embedding_model),
embedding_dimensions: Some(embedding_plan.embedding_dimensions),
..Default::default()
}
.apply(&state.db)
.await?;
if reembedding_needed {
if embedding_plan.reembedding_needed {
// Re-embedding is owned by startup (the worker/combined binary), not the admin request.
// Doing it inline here would leave the live, startup-built embedding provider embedding
// queries at the old dimension while stored vectors move to the new one — broken retrieval
// until restart. Persisting the new settings is enough: on the next restart the maintainer
// detects the index/dimension mismatch and re-embeds before rebuilding indexes.
info!(
new_dimensions = new_settings.embedding_dimensions,
"Embedding dimensions changed; restart the worker/server to re-embed and apply"
);
} else if embedding_plan.restart_needed {
info!(
new_model = %new_settings.embedding_model,
"Embedding model changed; restart the worker/server to apply"
);
}
let available_models = state
@@ -223,16 +310,98 @@ pub async fn update_model_settings(
.await
.map_err(|_e| AppError::InternalError("Failed to get models".to_string()))?;
let effective_backend =
effective_embedding_backend(&new_settings, &state.config).as_str().to_string();
let show_fastembed_models =
is_fastembed_admin_context(&new_settings, &state.config).then(list_fastembed_embedding_models);
Ok(TemplateResponse::new_partial(
"admin/sections/models.html",
"model_settings_form",
ModelSettingsData {
settings: new_settings,
available_models,
fastembed_models: show_fastembed_models,
fastembed_model_locked_by_config: state.config.fastembed_model.is_some(),
effective_embedding_backend: effective_backend,
},
))
}
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used)]
use super::*;
use common::utils::config::AppConfig;
fn openai_settings() -> SystemSettings {
SystemSettings {
id: "current".into(),
registrations_enabled: true,
require_email_verification: false,
query_model: "gpt-4o-mini".into(),
processing_model: "gpt-4o-mini".into(),
embedding_model: "text-embedding-3-small".into(),
embedding_dimensions: 1536,
embedding_backend: Some(EmbeddingBackend::OpenAI),
query_system_prompt: "q".into(),
ingestion_system_prompt: "i".into(),
image_processing_model: "gpt-4o-mini".into(),
image_processing_prompt: "p".into(),
voice_processing_model: "whisper-1".into(),
}
}
#[test]
fn plan_fastembed_update_sets_dimensions_from_model_metadata() {
let current = SystemSettings {
embedding_backend: Some(EmbeddingBackend::FastEmbed),
embedding_model: "Xenova/bge-small-en-v1.5".into(),
embedding_dimensions: 384,
..openai_settings()
};
let input = ModelSettingsInput {
query_model: current.query_model.clone(),
processing_model: current.processing_model.clone(),
image_processing_model: current.image_processing_model.clone(),
voice_processing_model: current.voice_processing_model.clone(),
embedding_model: Some("Xenova/bge-base-en-v1.5".into()),
embedding_dimensions: None,
};
let plan = plan_embedding_settings_update(&current, &input, &AppConfig::default())
.expect("plan");
assert_eq!(plan.embedding_model, "Xenova/bge-base-en-v1.5");
assert_eq!(plan.embedding_dimensions, 768);
assert!(plan.reembedding_needed);
assert!(plan.restart_needed);
}
#[test]
fn plan_fastembed_ignores_form_when_config_overrides_model() {
let current = SystemSettings {
embedding_backend: Some(EmbeddingBackend::FastEmbed),
..openai_settings()
};
let input = ModelSettingsInput {
query_model: current.query_model.clone(),
processing_model: current.processing_model.clone(),
image_processing_model: current.image_processing_model.clone(),
voice_processing_model: current.voice_processing_model.clone(),
embedding_model: Some("Xenova/bge-large-en-v1.5".into()),
embedding_dimensions: None,
};
let config = AppConfig {
embedding_backend: EmbeddingBackend::FastEmbed,
fastembed_model: Some("Xenova/bge-small-en-v1.5".into()),
..AppConfig::default()
};
let plan = plan_embedding_settings_update(&current, &input, &config).expect("plan");
assert_eq!(plan.embedding_model, current.embedding_model);
assert!(!plan.restart_needed);
}
}
#[derive(Serialize)]
pub struct SystemPromptEditData {
settings: SystemSettings,
@@ -4,7 +4,8 @@
<div class="text-sm uppercase tracking-wide opacity-60 mb-1">AI Models</div>
<h2 class="text-lg font-semibold">Model configuration</h2>
<p class="text-xs opacity-70 max-w-2xl">
Choose which models power conversational search, ingestion analysis, and embeddings. Adjusting embeddings may trigger a full reprocess.
Choose which models power conversational search, ingestion analysis, and embeddings.
Embedding dimension changes apply after you restart the worker or server.
</p>
</div>
<a
@@ -70,7 +71,30 @@
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
<div>
<div class="text-sm opacity-80 mb-1">Embedding Model</div>
{% if settings.embedding_backend == "fastembed" or settings.embedding_backend == "hashed" %}
{% if effective_embedding_backend == "fastembed" %}
{% if fastembed_model_locked_by_config %}
<input
type="text"
class="nb-input w-full opacity-60 cursor-not-allowed"
value="{{ settings.embedding_model }}"
disabled
/>
<p class="text-xs text-info mt-1">
Overridden by <span class="font-mono">fastembed_model</span> in config.yaml at startup. Remove that setting to manage the model here.
</p>
{% else %}
<select name="embedding_model" id="fastembed_model_select" class="nb-select w-full">
{% for fe in fastembed_models %}
<option value="{{ fe.model_code }}" {% if settings.embedding_model == fe.model_code %}selected{% endif %}>
{{ fe.model_code }} ({{ fe.dimension }} dims)
</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">
Save, then restart the worker or server to load the new model. First run may download weights.
</p>
{% endif %}
{% elif effective_embedding_backend == "hashed" %}
<input
type="text"
name="embedding_model"
@@ -78,11 +102,8 @@
value="{{ settings.embedding_model }}"
disabled
/>
<p class="text-xs opacity-70 mt-1">
Model: <span class="font-mono">{{ settings.embedding_model }} ({{ settings.embedding_dimensions }} dims)</span>
</p>
<p class="text-xs text-info mt-1">
Info: Embedding model is controlled by config when using <span class="font-mono">{{ settings.embedding_backend }}</span> backend.
Hashed embeddings use <span class="font-mono">embedding_dimensions</span> from config, not the admin UI.
</p>
{% else %}
<select name="embedding_model" class="nb-select w-full">
@@ -96,7 +117,18 @@
<div>
<div class="text-sm opacity-80 mb-1" for="embedding_dimensions">Embedding Dimensions</div>
{% if settings.embedding_backend == "fastembed" or settings.embedding_backend == "hashed" %}
{% if effective_embedding_backend == "fastembed" %}
<input
type="number"
id="embedding_dimensions"
class="nb-input w-full opacity-60 cursor-not-allowed"
value="{{ settings.embedding_dimensions }}"
disabled
/>
<p class="text-xs opacity-70 mt-1">
Fixed by the selected FastEmbed model. A dimension change triggers a full re-embed after restart.
</p>
{% elif effective_embedding_backend == "hashed" %}
<input
type="number"
id="embedding_dimensions"
@@ -106,8 +138,7 @@
disabled
/>
<p class="text-xs text-info mt-1">
Info: Dimensions are fixed for <span class="font-mono">{{ settings.embedding_backend }}</span> backend. Set
<span class="font-mono">EMBEDDING_BACKEND=openai</span> to use OpenAI embeddings.
Set <span class="font-mono">EMBEDDING_BACKEND=openai</span> for OpenAI embeddings, or configure hashed dims in config.
</p>
{% else %}
<input
@@ -119,15 +150,50 @@
required
min="1"
/>
<p class="text-xs opacity-70 mt-1">Changing dimensions will trigger a background re-embedding.</p>
<p class="text-xs opacity-70 mt-1">
Saving a new dimension updates settings only. Restart the worker (or combined app) to re-embed stored data and rebuild indexes.
</p>
{% endif %}
</div>
</div>
{% if settings.embedding_backend != "fastembed" and settings.embedding_backend != "hashed" %}
{% if effective_embedding_backend == "fastembed" and not fastembed_model_locked_by_config %}
<div class="nb-panel p-3 bg-base-200/40 border border-base-content/10 text-xs opacity-90 max-w-3xl">
<p class="mb-2">
<strong>FastEmbed:</strong> The running process keeps the model loaded until restart. Changing to a model with a
different dimension re-embeds all stored vectors on the next worker/server startup.
</p>
<p>
Same-dimension model swaps update settings only; existing vectors are not automatically regenerated until you
change dimension (or re-embed via the OpenAI workaround described in ops docs).
</p>
</div>
<div id="fastembed-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
<div class="text-sm">
<strong>Warning:</strong> You changed the FastEmbed model. Save, then restart the worker or server to apply.
If the dimension changed, stored embeddings and HNSW indexes will be rebuilt on startup.
</div>
</div>
{% endif %}
{% if effective_embedding_backend != "fastembed" and effective_embedding_backend != "hashed" %}
<div class="nb-panel p-3 bg-base-200/40 border border-base-content/10 text-xs opacity-90 max-w-3xl">
<p class="mb-2">
<strong>Re-embedding stored data:</strong> Only a change to <span class="font-mono">embedding_dimensions</span>
followed by a restart triggers a full re-embed of text chunks and knowledge entities. Changing the embedding model alone
does <em>not</em> update vectors already in the database.
</p>
<p>
To force a full re-embed (for example after switching models), save a <em>different</em> dimension integer, restart the
worker, then set the final dimension and model and restart again if needed.
</p>
</div>
<div id="embedding-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
<div class="text-sm">
<strong>Warning:</strong> Changing dimensions recreates embeddings for text chunks and knowledge entities. Confirm the target model requires the new value.
<strong>Warning:</strong> You changed embedding dimensions. Save, then restart the worker or server so stored embeddings
and HNSW indexes are rebuilt. Until then, search may use the old dimension.
</div>
</div>
{% endif %}
@@ -137,7 +203,26 @@
</div>
</form>
{% if settings.embedding_backend != "fastembed" and settings.embedding_backend != "hashed" %}
{% if effective_embedding_backend == "fastembed" and not fastembed_model_locked_by_config %}
<script>
(() => {
const modelSelect = document.getElementById('fastembed_model_select');
const alertElement = document.getElementById('fastembed-change-alert');
const initialModel = '{{ settings.embedding_model }}';
if (modelSelect && alertElement) {
modelSelect.addEventListener('change', (event) => {
if (String(event.target.value) !== String(initialModel)) {
alertElement.classList.remove('hidden');
} else {
alertElement.classList.add('hidden');
}
});
}
})();
</script>
{% endif %}
{% if effective_embedding_backend != "fastembed" and effective_embedding_backend != "hashed" %}
<script>
(() => {
const dimensionInput = document.getElementById('embedding_dimensions');