feat: pool fastembed, batch embeddings, and reconcile embedding config on startup

This commit is contained in:
Per Stark
2026-06-03 22:10:33 +02:00
parent 7b850769c9
commit 15c9f18f6e
24 changed files with 565 additions and 546 deletions
+10 -40
View File
@@ -1,5 +1,3 @@
use std::sync::Arc;
use async_openai::types::ListModelResponse;
use axum::{
extract::{Query, State},
@@ -11,17 +9,15 @@ use common::{
error::AppError,
storage::types::{
analytics::Analytics,
knowledge_entity::KnowledgeEntity,
system_prompts::{
DEFAULT_IMAGE_PROCESSING_PROMPT, DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT,
DEFAULT_QUERY_SYSTEM_PROMPT,
},
system_settings::{SystemSettings, SystemSettingsPatch},
text_chunk::TextChunk,
},
utils::embedding::EmbeddingBackend,
};
use tracing::{error, info};
use tracing::info;
use crate::{
html_state::HtmlState,
@@ -209,41 +205,15 @@ pub async fn update_model_settings(
.await?;
if reembedding_needed {
info!("Embedding dimensions changed. Spawning background re-embedding task...");
let db_for_task = Arc::clone(&state.db);
let openai_for_task = Arc::clone(&state.openai_client);
let new_model_for_task = new_settings.embedding_model.clone();
let new_dims_for_task = new_settings.embedding_dimensions;
tokio::spawn(async move {
// First, update all text chunks
if let Err(e) = TextChunk::update_all_embeddings(
&db_for_task,
&openai_for_task,
&new_model_for_task,
new_dims_for_task,
)
.await
{
error!("Background re-embedding task failed for TextChunks: {}", e);
}
// Second, update all knowledge entities
if let Err(e) = KnowledgeEntity::update_all_embeddings(
&db_for_task,
&openai_for_task,
&new_model_for_task,
new_dims_for_task,
)
.await
{
error!(
"Background re-embedding task failed for KnowledgeEntities: {}",
e
);
}
});
// Re-embedding is owned by startup (the worker/combined binary), not the admin request.
// Doing it inline here would leave the live, startup-built embedding provider embedding
// queries at the old dimension while stored vectors move to the new one — broken retrieval
// until restart. Persisting the new settings is enough: on the next restart the maintainer
// detects the index/dimension mismatch and re-embeds before rebuilding indexes.
info!(
new_dimensions = new_settings.embedding_dimensions,
"Embedding dimensions changed; restart the worker/server to re-embed and apply"
);
}
let available_models = state
@@ -359,8 +359,7 @@ async fn prepare_chat_request(
let retrieval_result = match retrieval_pipeline::retrieve(
&state.db,
&state.openai_client,
Some(&*state.embedding_provider),
&state.embedding_provider,
&user_message.content,
&user.id,
config,
+7 -5
View File
@@ -24,7 +24,7 @@ use common::{
user::User,
},
},
utils::embedding::{generate_embedding_with_provider, EmbeddingProvider},
utils::embedding::EmbeddingProvider,
};
use retrieval_pipeline::{
normalize_fts_terms, reciprocal_rank_fusion, RetrievalTuning, RrfConfig, Scored,
@@ -187,8 +187,11 @@ pub async fn create_knowledge_entity(
let embedding_input =
format!("name: {name}, description: {description}, type: {entity_type:?}");
let embedding =
generate_embedding_with_provider(&state.embedding_provider, &embedding_input).await?;
let embedding = state
.embedding_provider
.embed(&embedding_input)
.await
.map_err(AppError::from)?;
let source_id = format!("manual::{}", Uuid::new_v4());
let new_entity = KnowledgeEntity::new(
@@ -373,8 +376,7 @@ async fn suggest_related_entities(
"name: {}, description: {}, type: {:?}",
draft.name, draft.description, draft.entity_type
);
let embedding =
generate_embedding_with_provider(embedding_provider, &embedding_input).await?;
let embedding = embedding_provider.embed(&embedding_input).await?;
let take = MAX_RELATIONSHIP_SUGGESTIONS * 2;
let tuning = RetrievalTuning::default();
+1 -2
View File
@@ -171,8 +171,7 @@ async fn perform_search(
let result = retrieve(
&state.db,
&state.openai_client,
Some(&state.embedding_provider),
&state.embedding_provider,
trimmed_query,
&user.id,
config,