mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-27 12:26:21 +02:00
chore: ingestion-pipeline refactor, sort technical debt, rustfmt
This commit is contained in:
@@ -14,7 +14,7 @@ use common::{
|
||||
utils::embedding::EmbeddingProvider,
|
||||
};
|
||||
use retrieval_pipeline::{
|
||||
pipeline::{StageTimings, RetrievalConfig},
|
||||
pipeline::{RetrievalConfig, StageTimings},
|
||||
reranking::RerankerPool,
|
||||
};
|
||||
|
||||
@@ -122,11 +122,15 @@ impl<'a> EvaluationContext<'a> {
|
||||
}
|
||||
|
||||
pub fn slice(&self) -> Result<&slice::ResolvedSlice<'a>> {
|
||||
self.slice.as_ref().ok_or_else(|| anyhow!("slice has not been prepared"))
|
||||
self.slice
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("slice has not been prepared"))
|
||||
}
|
||||
|
||||
pub fn db(&self) -> Result<&SurrealDbClient> {
|
||||
self.db.as_ref().ok_or_else(|| anyhow!("database connection missing"))
|
||||
self.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("database connection missing"))
|
||||
}
|
||||
|
||||
pub fn descriptor(&self) -> Result<&snapshot::Descriptor> {
|
||||
@@ -142,15 +146,23 @@ impl<'a> EvaluationContext<'a> {
|
||||
}
|
||||
|
||||
pub fn openai_client(&self) -> Result<Arc<Client<async_openai::config::OpenAIConfig>>> {
|
||||
Ok(Arc::clone(self.openai_client.as_ref().ok_or_else(|| anyhow!("openai client missing"))?))
|
||||
Ok(Arc::clone(
|
||||
self.openai_client
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("openai client missing"))?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn corpus_handle(&self) -> Result<&corpus::CorpusHandle> {
|
||||
self.corpus_handle.as_ref().ok_or_else(|| anyhow!("corpus handle missing"))
|
||||
self.corpus_handle
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("corpus handle missing"))
|
||||
}
|
||||
|
||||
pub fn evaluation_user(&self) -> Result<&User> {
|
||||
self.eval_user.as_ref().ok_or_else(|| anyhow!("evaluation user missing"))
|
||||
self.eval_user
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("evaluation user missing"))
|
||||
}
|
||||
|
||||
#[allow(clippy::arithmetic_side_effects)]
|
||||
@@ -168,7 +180,8 @@ impl<'a> EvaluationContext<'a> {
|
||||
}
|
||||
|
||||
pub fn into_summary(self) -> Result<EvaluationSummary> {
|
||||
self.summary.ok_or_else(|| anyhow!("evaluation summary missing"))
|
||||
self.summary
|
||||
.ok_or_else(|| anyhow!("evaluation summary missing"))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::eval::{
|
||||
CaseSummary, RetrievedSummary,
|
||||
};
|
||||
use retrieval_pipeline::{
|
||||
pipeline::{self, StageTimings, RetrievalConfig},
|
||||
pipeline::{self, RetrievalConfig, StageTimings},
|
||||
reranking::RerankerPool,
|
||||
};
|
||||
use tokio::sync::Semaphore;
|
||||
@@ -169,10 +169,10 @@ pub(crate) async fn run_queries(
|
||||
let query_start = Instant::now();
|
||||
|
||||
debug!(question_id = %question_id, "Evaluating query");
|
||||
let query_embedding =
|
||||
embedding_provider.embed(&question).await.with_context(|| {
|
||||
format!("generating embedding for question {question_id}")
|
||||
})?;
|
||||
let query_embedding = embedding_provider
|
||||
.embed(&question)
|
||||
.await
|
||||
.with_context(|| format!("generating embedding for question {question_id}"))?;
|
||||
let reranker = match rerank_pool.as_ref() {
|
||||
Some(pool) => pool.checkout().await,
|
||||
None => None,
|
||||
@@ -204,8 +204,10 @@ pub(crate) async fn run_queries(
|
||||
let mut match_rank = None;
|
||||
let answers_lower: Vec<String> =
|
||||
answers.iter().map(|ans| ans.to_ascii_lowercase()).collect();
|
||||
let expected_chunk_ids_set: HashSet<&str> =
|
||||
expected_chunk_ids.iter().map(std::string::String::as_str).collect();
|
||||
let expected_chunk_ids_set: HashSet<&str> = expected_chunk_ids
|
||||
.iter()
|
||||
.map(std::string::String::as_str)
|
||||
.collect();
|
||||
let chunk_id_required = has_verified_chunks;
|
||||
let mut entity_hit = false;
|
||||
let mut chunk_text_hit = false;
|
||||
@@ -304,15 +306,12 @@ pub(crate) async fn run_queries(
|
||||
None
|
||||
};
|
||||
|
||||
Ok::<
|
||||
(
|
||||
usize,
|
||||
CaseSummary,
|
||||
Option<CaseDiagnostics>,
|
||||
StageTimings,
|
||||
),
|
||||
anyhow::Error,
|
||||
>((idx, summary, diagnostics, stage_timings))
|
||||
Ok::<(usize, CaseSummary, Option<CaseDiagnostics>, StageTimings), anyhow::Error>((
|
||||
idx,
|
||||
summary,
|
||||
diagnostics,
|
||||
stage_timings,
|
||||
))
|
||||
}
|
||||
})
|
||||
.buffer_unordered(concurrency)
|
||||
|
||||
@@ -13,7 +13,11 @@ use super::super::{
|
||||
};
|
||||
use super::{map_guard_error, StageResult};
|
||||
|
||||
#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects, clippy::cast_precision_loss)]
|
||||
#[allow(
|
||||
clippy::too_many_lines,
|
||||
clippy::arithmetic_side_effects,
|
||||
clippy::cast_precision_loss
|
||||
)]
|
||||
pub(crate) async fn summarize(
|
||||
machine: EvaluationMachine<(), QueriesFinished>,
|
||||
ctx: &mut EvaluationContext<'_>,
|
||||
|
||||
Reference in New Issue
Block a user