feat: pool fastembed, batch embeddings, and reconcile embedding config on startup

This commit is contained in:
Per Stark
2026-06-03 22:10:33 +02:00
parent 7b850769c9
commit 15c9f18f6e
24 changed files with 565 additions and 546 deletions
@@ -11,7 +11,7 @@ use crate::{
},
openai,
};
use common::utils::embedding::EmbeddingProvider;
use common::utils::embedding::{default_embedding_pool_size, EmbeddingProvider};
use super::super::{
context::{EvalStage, EvaluationContext},
@@ -43,9 +43,12 @@ pub(crate) async fn prepare_db(
// Create embedding provider directly from config (eval only supports FastEmbed and Hashed)
let embedding_provider = match config.embedding_backend {
crate::args::EmbeddingBackend::FastEmbed => {
EmbeddingProvider::new_fastembed(config.embedding_model.clone())
.await
.context("creating FastEmbed provider")?
EmbeddingProvider::new_fastembed(
config.embedding_model.clone(),
default_embedding_pool_size(),
)
.await
.context("creating FastEmbed provider")?
}
crate::args::EmbeddingBackend::Hashed => {
EmbeddingProvider::new_hashed(1536).context("creating Hashed provider")?
@@ -136,12 +136,10 @@ pub(crate) async fn run_queries(
let embedding_provider_for_queries = ctx.embedding_provider()?.clone();
let rerank_pool_for_queries = rerank_pool.clone();
let db = ctx.db()?.clone();
let openai_client = ctx.openai_client()?;
let raw_results = stream::iter(cases_iter)
.map(move |(idx, case)| {
let db = db.clone();
let openai_client = Arc::clone(&openai_client);
let user_id = user_id.clone();
let retrieval_config = Arc::clone(&retrieval_config);
let embedding_provider = embedding_provider_for_queries.clone();
@@ -180,8 +178,7 @@ pub(crate) async fn run_queries(
let params = pipeline::RetrievalParams {
db_client: &db,
openai_client: &openai_client,
embedding_provider: Some(&embedding_provider),
embedding_provider: &embedding_provider,
input_text: &question,
user_id: &user_id,
config: (*retrieval_config).clone(),