benchmarks: v1

Benchmarking ingestion, retrieval precision and performance
This commit is contained in:
Per Stark
2025-11-04 11:22:45 +01:00
parent 112a6965a4
commit 0eda65b07e
46 changed files with 8407 additions and 144 deletions
+41 -1
View File
@@ -1,8 +1,14 @@
use std::ops::Range;
use common::{
error::AppError,
storage::{
db::SurrealDbClient,
types::{ingestion_task::IngestionTask, text_content::TextContent},
types::{
ingestion_task::IngestionTask, knowledge_entity::KnowledgeEntity,
knowledge_relationship::KnowledgeRelationship, text_chunk::TextChunk,
text_content::TextContent,
},
},
};
use composite_retrieval::RetrievedEntity;
@@ -24,6 +30,14 @@ pub struct PipelineContext<'a> {
pub analysis: Option<LLMEnrichmentResult>,
}
#[derive(Debug)]
pub struct PipelineArtifacts {
pub text_content: TextContent,
pub entities: Vec<KnowledgeEntity>,
pub relationships: Vec<KnowledgeRelationship>,
pub chunks: Vec<TextChunk>,
}
impl<'a> PipelineContext<'a> {
pub fn new(
task: &'a IngestionTask,
@@ -73,4 +87,30 @@ impl<'a> PipelineContext<'a> {
);
err
}
pub async fn build_artifacts(&mut self) -> Result<PipelineArtifacts, AppError> {
let content = self.take_text_content()?;
let analysis = self.take_analysis()?;
let (entities, relationships) = self
.services
.convert_analysis(
&content,
&analysis,
self.pipeline_config.tuning.entity_embedding_concurrency,
)
.await?;
let chunk_range: Range<usize> = self.pipeline_config.tuning.chunk_min_chars
..self.pipeline_config.tuning.chunk_max_chars;
let chunks = self.services.prepare_chunks(&content, chunk_range).await?;
Ok(PipelineArtifacts {
text_content: content,
entities,
relationships,
chunks,
})
}
}
+29 -1
View File
@@ -7,6 +7,7 @@ mod stages;
mod state;
pub use config::{IngestionConfig, IngestionTuning};
pub use enrichment_result::{LLMEnrichmentResult, LLMKnowledgeEntity, LLMRelationship};
pub use services::{DefaultPipelineServices, PipelineServices};
use std::{
@@ -31,7 +32,7 @@ use composite_retrieval::reranking::RerankerPool;
use tracing::{debug, info, warn};
use self::{
context::PipelineContext,
context::{PipelineArtifacts, PipelineContext},
stages::{enrich, persist, prepare_content, retrieve_related},
state::ready,
};
@@ -224,6 +225,33 @@ impl IngestionPipeline {
Ok(())
}
/// Runs the ingestion pipeline up to (but excluding) persistence and returns the prepared artifacts.
pub async fn produce_artifacts(
&self,
task: &IngestionTask,
) -> Result<PipelineArtifacts, AppError> {
let payload = task.content.clone();
let mut ctx = PipelineContext::new(
task,
self.db.as_ref(),
&self.pipeline_config,
self.services.as_ref(),
);
let machine = ready();
let machine = prepare_content(machine, &mut ctx, payload)
.await
.map_err(|err| ctx.abort(err))?;
let machine = retrieve_related(machine, &mut ctx)
.await
.map_err(|err| ctx.abort(err))?;
let _machine = enrich(machine, &mut ctx)
.await
.map_err(|err| ctx.abort(err))?;
ctx.build_artifacts().await.map_err(|err| ctx.abort(err))
}
}
#[cfg(test)]
+24 -2
View File
@@ -29,6 +29,8 @@ use crate::utils::llm_instructions::{
get_ingress_analysis_schema, INGRESS_ANALYSIS_SYSTEM_MESSAGE,
};
const EMBEDDING_QUERY_CHAR_LIMIT: usize = 12_000;
#[async_trait]
pub trait PipelineServices: Send + Sync {
async fn prepare_text_content(
@@ -162,9 +164,13 @@ impl PipelineServices for DefaultPipelineServices {
&self,
content: &TextContent,
) -> Result<Vec<RetrievedEntity>, AppError> {
let truncated_body = truncate_for_embedding(&content.text, EMBEDDING_QUERY_CHAR_LIMIT);
let input_text = format!(
"content: {}, category: {}, user_context: {:?}",
content.text, content.category, content.context
"content: {}\n[truncated={}], category: {}, user_context: {:?}",
truncated_body,
truncated_body.len() < content.text.len(),
content.category,
content.context
);
let rerank_lease = match &self.reranker_pool {
@@ -239,3 +245,19 @@ impl PipelineServices for DefaultPipelineServices {
Ok(chunks)
}
}
fn truncate_for_embedding(text: &str, max_chars: usize) -> String {
if text.chars().count() <= max_chars {
return text.to_string();
}
let mut truncated = String::with_capacity(max_chars + 3);
for (idx, ch) in text.chars().enumerate() {
if idx >= max_chars {
break;
}
truncated.push(ch);
}
truncated.push_str("");
truncated
}
+13 -29
View File
@@ -7,7 +7,6 @@ use common::{
types::{
ingestion_payload::IngestionPayload, knowledge_entity::KnowledgeEntity,
knowledge_relationship::KnowledgeRelationship, text_chunk::TextChunk,
text_content::TextContent,
},
},
};
@@ -16,8 +15,7 @@ use tokio::time::{sleep, Duration};
use tracing::{debug, instrument, warn};
use super::{
context::PipelineContext,
services::PipelineServices,
context::{PipelineArtifacts, PipelineContext},
state::{ContentPrepared, Enriched, IngestionMachine, Persisted, Ready, Retrieved},
};
@@ -134,37 +132,26 @@ pub async fn persist(
machine: IngestionMachine<(), Enriched>,
ctx: &mut PipelineContext<'_>,
) -> Result<IngestionMachine<(), Persisted>, AppError> {
let content = ctx.take_text_content()?;
let analysis = ctx.take_analysis()?;
let (entities, relationships) = ctx
.services
.convert_analysis(
&content,
&analysis,
ctx.pipeline_config.tuning.entity_embedding_concurrency,
)
.await?;
let PipelineArtifacts {
text_content,
entities,
relationships,
chunks,
} = ctx.build_artifacts().await?;
let entity_count = entities.len();
let relationship_count = relationships.len();
let chunk_range =
ctx.pipeline_config.tuning.chunk_min_chars..ctx.pipeline_config.tuning.chunk_max_chars;
let ((), chunk_count) = tokio::try_join!(
store_graph_entities(ctx.db, &ctx.pipeline_config.tuning, entities, relationships),
store_vector_chunks(
ctx.db,
ctx.services,
ctx.task_id.as_str(),
&content,
chunk_range,
&chunks,
&ctx.pipeline_config.tuning
)
)?;
ctx.db.store_item(content).await?;
ctx.db.store_item(text_content).await?;
ctx.db.rebuild_indexes().await?;
debug!(
@@ -252,17 +239,14 @@ async fn store_graph_entities(
async fn store_vector_chunks(
db: &SurrealDbClient,
services: &dyn PipelineServices,
task_id: &str,
content: &TextContent,
chunk_range: std::ops::Range<usize>,
chunks: &[TextChunk],
tuning: &super::config::IngestionTuning,
) -> Result<usize, AppError> {
let prepared_chunks = services.prepare_chunks(content, chunk_range).await?;
let chunk_count = prepared_chunks.len();
let chunk_count = chunks.len();
let batch_size = tuning.chunk_insert_concurrency.max(1);
for chunk in &prepared_chunks {
for chunk in chunks {
debug!(
task_id = %task_id,
chunk_id = %chunk.id,
@@ -271,7 +255,7 @@ async fn store_vector_chunks(
);
}
for batch in prepared_chunks.chunks(batch_size) {
for batch in chunks.chunks(batch_size) {
store_chunk_batch(db, batch, tuning).await?;
}