mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-27 18:09:28 +02:00
benchmarks: v1
Benchmarking ingestion, retrieval precision and performance
This commit is contained in:
@@ -1,8 +1,14 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use common::{
|
||||
error::AppError,
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
types::{ingestion_task::IngestionTask, text_content::TextContent},
|
||||
types::{
|
||||
ingestion_task::IngestionTask, knowledge_entity::KnowledgeEntity,
|
||||
knowledge_relationship::KnowledgeRelationship, text_chunk::TextChunk,
|
||||
text_content::TextContent,
|
||||
},
|
||||
},
|
||||
};
|
||||
use composite_retrieval::RetrievedEntity;
|
||||
@@ -24,6 +30,14 @@ pub struct PipelineContext<'a> {
|
||||
pub analysis: Option<LLMEnrichmentResult>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PipelineArtifacts {
|
||||
pub text_content: TextContent,
|
||||
pub entities: Vec<KnowledgeEntity>,
|
||||
pub relationships: Vec<KnowledgeRelationship>,
|
||||
pub chunks: Vec<TextChunk>,
|
||||
}
|
||||
|
||||
impl<'a> PipelineContext<'a> {
|
||||
pub fn new(
|
||||
task: &'a IngestionTask,
|
||||
@@ -73,4 +87,30 @@ impl<'a> PipelineContext<'a> {
|
||||
);
|
||||
err
|
||||
}
|
||||
|
||||
pub async fn build_artifacts(&mut self) -> Result<PipelineArtifacts, AppError> {
|
||||
let content = self.take_text_content()?;
|
||||
let analysis = self.take_analysis()?;
|
||||
|
||||
let (entities, relationships) = self
|
||||
.services
|
||||
.convert_analysis(
|
||||
&content,
|
||||
&analysis,
|
||||
self.pipeline_config.tuning.entity_embedding_concurrency,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let chunk_range: Range<usize> = self.pipeline_config.tuning.chunk_min_chars
|
||||
..self.pipeline_config.tuning.chunk_max_chars;
|
||||
|
||||
let chunks = self.services.prepare_chunks(&content, chunk_range).await?;
|
||||
|
||||
Ok(PipelineArtifacts {
|
||||
text_content: content,
|
||||
entities,
|
||||
relationships,
|
||||
chunks,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ mod stages;
|
||||
mod state;
|
||||
|
||||
pub use config::{IngestionConfig, IngestionTuning};
|
||||
pub use enrichment_result::{LLMEnrichmentResult, LLMKnowledgeEntity, LLMRelationship};
|
||||
pub use services::{DefaultPipelineServices, PipelineServices};
|
||||
|
||||
use std::{
|
||||
@@ -31,7 +32,7 @@ use composite_retrieval::reranking::RerankerPool;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use self::{
|
||||
context::PipelineContext,
|
||||
context::{PipelineArtifacts, PipelineContext},
|
||||
stages::{enrich, persist, prepare_content, retrieve_related},
|
||||
state::ready,
|
||||
};
|
||||
@@ -224,6 +225,33 @@ impl IngestionPipeline {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Runs the ingestion pipeline up to (but excluding) persistence and returns the prepared artifacts.
|
||||
pub async fn produce_artifacts(
|
||||
&self,
|
||||
task: &IngestionTask,
|
||||
) -> Result<PipelineArtifacts, AppError> {
|
||||
let payload = task.content.clone();
|
||||
let mut ctx = PipelineContext::new(
|
||||
task,
|
||||
self.db.as_ref(),
|
||||
&self.pipeline_config,
|
||||
self.services.as_ref(),
|
||||
);
|
||||
|
||||
let machine = ready();
|
||||
let machine = prepare_content(machine, &mut ctx, payload)
|
||||
.await
|
||||
.map_err(|err| ctx.abort(err))?;
|
||||
let machine = retrieve_related(machine, &mut ctx)
|
||||
.await
|
||||
.map_err(|err| ctx.abort(err))?;
|
||||
let _machine = enrich(machine, &mut ctx)
|
||||
.await
|
||||
.map_err(|err| ctx.abort(err))?;
|
||||
|
||||
ctx.build_artifacts().await.map_err(|err| ctx.abort(err))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -29,6 +29,8 @@ use crate::utils::llm_instructions::{
|
||||
get_ingress_analysis_schema, INGRESS_ANALYSIS_SYSTEM_MESSAGE,
|
||||
};
|
||||
|
||||
const EMBEDDING_QUERY_CHAR_LIMIT: usize = 12_000;
|
||||
|
||||
#[async_trait]
|
||||
pub trait PipelineServices: Send + Sync {
|
||||
async fn prepare_text_content(
|
||||
@@ -162,9 +164,13 @@ impl PipelineServices for DefaultPipelineServices {
|
||||
&self,
|
||||
content: &TextContent,
|
||||
) -> Result<Vec<RetrievedEntity>, AppError> {
|
||||
let truncated_body = truncate_for_embedding(&content.text, EMBEDDING_QUERY_CHAR_LIMIT);
|
||||
let input_text = format!(
|
||||
"content: {}, category: {}, user_context: {:?}",
|
||||
content.text, content.category, content.context
|
||||
"content: {}\n[truncated={}], category: {}, user_context: {:?}",
|
||||
truncated_body,
|
||||
truncated_body.len() < content.text.len(),
|
||||
content.category,
|
||||
content.context
|
||||
);
|
||||
|
||||
let rerank_lease = match &self.reranker_pool {
|
||||
@@ -239,3 +245,19 @@ impl PipelineServices for DefaultPipelineServices {
|
||||
Ok(chunks)
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_for_embedding(text: &str, max_chars: usize) -> String {
|
||||
if text.chars().count() <= max_chars {
|
||||
return text.to_string();
|
||||
}
|
||||
|
||||
let mut truncated = String::with_capacity(max_chars + 3);
|
||||
for (idx, ch) in text.chars().enumerate() {
|
||||
if idx >= max_chars {
|
||||
break;
|
||||
}
|
||||
truncated.push(ch);
|
||||
}
|
||||
truncated.push_str("…");
|
||||
truncated
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ use common::{
|
||||
types::{
|
||||
ingestion_payload::IngestionPayload, knowledge_entity::KnowledgeEntity,
|
||||
knowledge_relationship::KnowledgeRelationship, text_chunk::TextChunk,
|
||||
text_content::TextContent,
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -16,8 +15,7 @@ use tokio::time::{sleep, Duration};
|
||||
use tracing::{debug, instrument, warn};
|
||||
|
||||
use super::{
|
||||
context::PipelineContext,
|
||||
services::PipelineServices,
|
||||
context::{PipelineArtifacts, PipelineContext},
|
||||
state::{ContentPrepared, Enriched, IngestionMachine, Persisted, Ready, Retrieved},
|
||||
};
|
||||
|
||||
@@ -134,37 +132,26 @@ pub async fn persist(
|
||||
machine: IngestionMachine<(), Enriched>,
|
||||
ctx: &mut PipelineContext<'_>,
|
||||
) -> Result<IngestionMachine<(), Persisted>, AppError> {
|
||||
let content = ctx.take_text_content()?;
|
||||
let analysis = ctx.take_analysis()?;
|
||||
|
||||
let (entities, relationships) = ctx
|
||||
.services
|
||||
.convert_analysis(
|
||||
&content,
|
||||
&analysis,
|
||||
ctx.pipeline_config.tuning.entity_embedding_concurrency,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let PipelineArtifacts {
|
||||
text_content,
|
||||
entities,
|
||||
relationships,
|
||||
chunks,
|
||||
} = ctx.build_artifacts().await?;
|
||||
let entity_count = entities.len();
|
||||
let relationship_count = relationships.len();
|
||||
|
||||
let chunk_range =
|
||||
ctx.pipeline_config.tuning.chunk_min_chars..ctx.pipeline_config.tuning.chunk_max_chars;
|
||||
|
||||
let ((), chunk_count) = tokio::try_join!(
|
||||
store_graph_entities(ctx.db, &ctx.pipeline_config.tuning, entities, relationships),
|
||||
store_vector_chunks(
|
||||
ctx.db,
|
||||
ctx.services,
|
||||
ctx.task_id.as_str(),
|
||||
&content,
|
||||
chunk_range,
|
||||
&chunks,
|
||||
&ctx.pipeline_config.tuning
|
||||
)
|
||||
)?;
|
||||
|
||||
ctx.db.store_item(content).await?;
|
||||
ctx.db.store_item(text_content).await?;
|
||||
ctx.db.rebuild_indexes().await?;
|
||||
|
||||
debug!(
|
||||
@@ -252,17 +239,14 @@ async fn store_graph_entities(
|
||||
|
||||
async fn store_vector_chunks(
|
||||
db: &SurrealDbClient,
|
||||
services: &dyn PipelineServices,
|
||||
task_id: &str,
|
||||
content: &TextContent,
|
||||
chunk_range: std::ops::Range<usize>,
|
||||
chunks: &[TextChunk],
|
||||
tuning: &super::config::IngestionTuning,
|
||||
) -> Result<usize, AppError> {
|
||||
let prepared_chunks = services.prepare_chunks(content, chunk_range).await?;
|
||||
let chunk_count = prepared_chunks.len();
|
||||
let chunk_count = chunks.len();
|
||||
|
||||
let batch_size = tuning.chunk_insert_concurrency.max(1);
|
||||
for chunk in &prepared_chunks {
|
||||
for chunk in chunks {
|
||||
debug!(
|
||||
task_id = %task_id,
|
||||
chunk_id = %chunk.id,
|
||||
@@ -271,7 +255,7 @@ async fn store_vector_chunks(
|
||||
);
|
||||
}
|
||||
|
||||
for batch in prepared_chunks.chunks(batch_size) {
|
||||
for batch in chunks.chunks(batch_size) {
|
||||
store_chunk_batch(db, batch, tuning).await?;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user