chore: ingestion-pipeline refactor, sort technical debt, rustfmt

This commit is contained in:
Per Stark
2026-05-31 19:37:34 +02:00
parent 5c2d2e24d3
commit 3897345ab3
47 changed files with 1729 additions and 1343 deletions
+20 -7
View File
@@ -14,7 +14,7 @@ use common::{
utils::embedding::EmbeddingProvider,
};
use retrieval_pipeline::{
pipeline::{StageTimings, RetrievalConfig},
pipeline::{RetrievalConfig, StageTimings},
reranking::RerankerPool,
};
@@ -122,11 +122,15 @@ impl<'a> EvaluationContext<'a> {
}
pub fn slice(&self) -> Result<&slice::ResolvedSlice<'a>> {
self.slice.as_ref().ok_or_else(|| anyhow!("slice has not been prepared"))
self.slice
.as_ref()
.ok_or_else(|| anyhow!("slice has not been prepared"))
}
pub fn db(&self) -> Result<&SurrealDbClient> {
self.db.as_ref().ok_or_else(|| anyhow!("database connection missing"))
self.db
.as_ref()
.ok_or_else(|| anyhow!("database connection missing"))
}
pub fn descriptor(&self) -> Result<&snapshot::Descriptor> {
@@ -142,15 +146,23 @@ impl<'a> EvaluationContext<'a> {
}
pub fn openai_client(&self) -> Result<Arc<Client<async_openai::config::OpenAIConfig>>> {
Ok(Arc::clone(self.openai_client.as_ref().ok_or_else(|| anyhow!("openai client missing"))?))
Ok(Arc::clone(
self.openai_client
.as_ref()
.ok_or_else(|| anyhow!("openai client missing"))?,
))
}
pub fn corpus_handle(&self) -> Result<&corpus::CorpusHandle> {
self.corpus_handle.as_ref().ok_or_else(|| anyhow!("corpus handle missing"))
self.corpus_handle
.as_ref()
.ok_or_else(|| anyhow!("corpus handle missing"))
}
pub fn evaluation_user(&self) -> Result<&User> {
self.eval_user.as_ref().ok_or_else(|| anyhow!("evaluation user missing"))
self.eval_user
.as_ref()
.ok_or_else(|| anyhow!("evaluation user missing"))
}
#[allow(clippy::arithmetic_side_effects)]
@@ -168,7 +180,8 @@ impl<'a> EvaluationContext<'a> {
}
pub fn into_summary(self) -> Result<EvaluationSummary> {
self.summary.ok_or_else(|| anyhow!("evaluation summary missing"))
self.summary
.ok_or_else(|| anyhow!("evaluation summary missing"))
}
}
+15 -16
View File
@@ -10,7 +10,7 @@ use crate::eval::{
CaseSummary, RetrievedSummary,
};
use retrieval_pipeline::{
pipeline::{self, StageTimings, RetrievalConfig},
pipeline::{self, RetrievalConfig, StageTimings},
reranking::RerankerPool,
};
use tokio::sync::Semaphore;
@@ -169,10 +169,10 @@ pub(crate) async fn run_queries(
let query_start = Instant::now();
debug!(question_id = %question_id, "Evaluating query");
let query_embedding =
embedding_provider.embed(&question).await.with_context(|| {
format!("generating embedding for question {question_id}")
})?;
let query_embedding = embedding_provider
.embed(&question)
.await
.with_context(|| format!("generating embedding for question {question_id}"))?;
let reranker = match rerank_pool.as_ref() {
Some(pool) => pool.checkout().await,
None => None,
@@ -204,8 +204,10 @@ pub(crate) async fn run_queries(
let mut match_rank = None;
let answers_lower: Vec<String> =
answers.iter().map(|ans| ans.to_ascii_lowercase()).collect();
let expected_chunk_ids_set: HashSet<&str> =
expected_chunk_ids.iter().map(std::string::String::as_str).collect();
let expected_chunk_ids_set: HashSet<&str> = expected_chunk_ids
.iter()
.map(std::string::String::as_str)
.collect();
let chunk_id_required = has_verified_chunks;
let mut entity_hit = false;
let mut chunk_text_hit = false;
@@ -304,15 +306,12 @@ pub(crate) async fn run_queries(
None
};
Ok::<
(
usize,
CaseSummary,
Option<CaseDiagnostics>,
StageTimings,
),
anyhow::Error,
>((idx, summary, diagnostics, stage_timings))
Ok::<(usize, CaseSummary, Option<CaseDiagnostics>, StageTimings), anyhow::Error>((
idx,
summary,
diagnostics,
stage_timings,
))
}
})
.buffer_unordered(concurrency)
+5 -1
View File
@@ -13,7 +13,11 @@ use super::super::{
};
use super::{map_guard_error, StageResult};
#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects, clippy::cast_precision_loss)]
#[allow(
clippy::too_many_lines,
clippy::arithmetic_side_effects,
clippy::cast_precision_loss
)]
pub(crate) async fn summarize(
machine: EvaluationMachine<(), QueriesFinished>,
ctx: &mut EvaluationContext<'_>,