chore: refactor retrieval pipeline to chunk-first RRF with derived entities and slimmer eval surface.

Collapse the multi-strategy entity engine into one benchmarked chunk retrieval path, derive entities from retrieved chunks, and update consumers, docs, and clippy fixes across the workspace.
This commit is contained in:
Per Stark
2026-05-30 22:19:08 +02:00
parent a8e30192ba
commit e9d8654324
38 changed files with 1049 additions and 2614 deletions
+63 -115
View File
@@ -1,10 +1,9 @@
pub mod answer_retrieval;
pub mod answer_retrieval_helper;
pub mod graph;
pub mod pipeline;
pub mod reranking;
pub mod scoring;
pub(crate) mod scoring;
use common::{
error::AppError,
@@ -16,39 +15,28 @@ use common::{
use reranking::RerankerLease;
use tracing::instrument;
// Strategy output variants - defined before pipeline module
/// Result of a retrieval run.
///
/// Chunk retrieval is always performed; entities are only present when the caller
/// requested entity resolution via [`RetrievalConfig::with_entities`].
#[derive(Debug)]
pub enum StrategyOutput {
Entities(Vec<RetrievedEntity>),
pub enum RetrievalOutput {
Chunks(Vec<RetrievedChunk>),
Search(SearchResult),
}
/// Unified search result containing both chunks and entities
#[derive(Debug, Clone)]
pub struct SearchResult {
pub chunks: Vec<RetrievedChunk>,
pub entities: Vec<RetrievedEntity>,
}
impl SearchResult {
pub fn new(chunks: Vec<RetrievedChunk>, entities: Vec<RetrievedEntity>) -> Self {
Self { chunks, entities }
}
pub fn is_empty(&self) -> bool {
self.chunks.is_empty() && self.entities.is_empty()
}
WithEntities {
chunks: Vec<RetrievedChunk>,
entities: Vec<RetrievedEntity>,
},
}
pub use pipeline::{
retrieved_entities_to_json, Diagnostics, StageTimings, RetrievalConfig,
RetrievalStrategy, RetrievalTuning, RetrievalTuningFlags, SearchTarget,
retrieved_entities_to_json, Diagnostics, RetrievalConfig, RetrievalParams, StageKind,
StageTimings,
};
// Backward-compatible type aliases for external consumers
pub type PipelineDiagnostics = Diagnostics;
pub type PipelineStageTimings = StageTimings;
/// Round a score to three decimal places for JSON output.
pub(crate) fn round_score(value: f32) -> f64 {
(f64::from(value) * 1000.0).round() / 1000.0
}
// Captures a supporting chunk plus its fused retrieval score for downstream prompts.
#[derive(Debug, Clone)]
@@ -57,7 +45,7 @@ pub struct RetrievedChunk {
pub score: f32,
}
// Final entity representation returned to callers, enriched with ranked chunks.
// Knowledge entity resolved from retrieved chunks, enriched with its contributing chunks.
#[derive(Debug, Clone)]
pub struct RetrievedEntity {
pub entity: KnowledgeEntity,
@@ -65,9 +53,9 @@ pub struct RetrievedEntity {
pub chunks: Vec<RetrievedChunk>,
}
/// Primary orchestrator for the process of retrieving `KnowledgeEntity` values related to an `input_text`
/// Run chunk-first hybrid retrieval for `input_text`, optionally resolving owning entities.
#[instrument(skip_all, fields(user_id))]
pub async fn retrieve_entities(
pub async fn retrieve(
db_client: &SurrealDbClient,
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
embedding_provider: Option<&common::utils::embedding::EmbeddingProvider>,
@@ -75,8 +63,8 @@ pub async fn retrieve_entities(
user_id: &str,
config: RetrievalConfig,
reranker: Option<RerankerLease>,
) -> Result<StrategyOutput, AppError> {
let params = pipeline::StrategyParams {
) -> Result<RetrievalOutput, AppError> {
let params = pipeline::RetrievalParams {
db_client,
openai_client,
embedding_provider,
@@ -94,6 +82,7 @@ mod tests {
use anyhow::{self};
use async_openai::Client;
use common::storage::indexes::ensure_runtime;
use common::storage::types::knowledge_entity::{KnowledgeEntity, KnowledgeEntityType};
use common::storage::types::system_settings::SystemSettings;
use uuid::Uuid;
@@ -133,7 +122,7 @@ mod tests {
}
#[tokio::test]
async fn test_default_strategy_retrieves_chunks() -> anyhow::Result<()> {
async fn test_chunk_retrieval_returns_chunks() -> anyhow::Result<()> {
let db = setup_test_db().await?;
let user_id = "test_user";
let chunk = TextChunk::new(
@@ -145,7 +134,7 @@ mod tests {
TextChunk::store_with_embedding(chunk.clone(), chunk_embedding_primary(), &db).await?;
let openai_client = Client::new();
let params = pipeline::StrategyParams {
let params = pipeline::RetrievalParams {
db_client: &db,
openai_client: &openai_client,
embedding_provider: None,
@@ -154,12 +143,13 @@ mod tests {
config: RetrievalConfig::default(),
reranker: None,
};
let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
.await?;
let results = pipeline::run_with_embedding(params, test_embedding()).await?;
let chunks = match results {
StrategyOutput::Chunks(items) => items,
other => anyhow::bail!("expected chunk results, got {other:?}"),
RetrievalOutput::Chunks(items) => items,
RetrievalOutput::WithEntities { .. } => {
anyhow::bail!("expected chunk results, got entities")
}
};
assert!(!chunks.is_empty(), "Expected at least one retrieval result");
@@ -171,8 +161,7 @@ mod tests {
}
#[tokio::test]
async fn test_default_strategy_returns_chunks_from_multiple_sources(
) -> anyhow::Result<()> {
async fn test_chunk_retrieval_returns_chunks_from_multiple_sources() -> anyhow::Result<()> {
let db = setup_test_db().await?;
let user_id = "multi_source_user";
@@ -191,7 +180,7 @@ mod tests {
TextChunk::store_with_embedding(secondary_chunk, chunk_embedding_secondary(), &db).await?;
let openai_client = Client::new();
let params = pipeline::StrategyParams {
let params = pipeline::RetrievalParams {
db_client: &db,
openai_client: &openai_client,
embedding_provider: None,
@@ -200,12 +189,13 @@ mod tests {
config: RetrievalConfig::default(),
reranker: None,
};
let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
.await?;
let results = pipeline::run_with_embedding(params, test_embedding()).await?;
let chunks = match results {
StrategyOutput::Chunks(items) => items,
other => anyhow::bail!("expected chunk results, got {other:?}"),
RetrievalOutput::Chunks(items) => items,
RetrievalOutput::WithEntities { .. } => {
anyhow::bail!("expected chunk results, got entities")
}
};
assert!(chunks.len() >= 2, "Expected chunks from multiple sources");
@@ -223,96 +213,54 @@ mod tests {
}
#[tokio::test]
async fn test_revised_strategy_returns_chunks() -> anyhow::Result<()> {
async fn test_with_entities_resolves_owning_entities() -> anyhow::Result<()> {
let db = setup_test_db().await?;
let user_id = "chunk_user";
let chunk_one = TextChunk::new(
"src_alpha".into(),
"Tokio tasks execute on worker threads managed by the runtime.".into(),
user_id.into(),
);
let chunk_two = TextChunk::new(
"src_beta".into(),
"Hyper utilizes Tokio to drive HTTP state machines efficiently.".into(),
user_id.into(),
);
let user_id = "entity_user";
TextChunk::store_with_embedding(chunk_one.clone(), chunk_embedding_primary(), &db).await?;
TextChunk::store_with_embedding(chunk_two.clone(), chunk_embedding_secondary(), &db).await?;
let config = RetrievalConfig::with_strategy(RetrievalStrategy::Default);
let openai_client = Client::new();
let params = pipeline::StrategyParams {
db_client: &db,
openai_client: &openai_client,
embedding_provider: None,
input_text: "tokio runtime worker behavior",
user_id,
config,
reranker: None,
};
let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
.await?;
let chunks = match results {
StrategyOutput::Chunks(items) => items,
other => anyhow::bail!("expected chunk results, got {other:?}"),
};
assert!(
!chunks.is_empty(),
"Revised strategy should return chunk-only responses"
);
assert!(
chunks
.iter()
.any(|entry| entry.chunk.chunk.contains("Tokio")),
"Chunk results should contain relevant snippets"
);
Ok(())
}
#[tokio::test]
async fn test_search_strategy_returns_search_result() -> anyhow::Result<()> {
let db = setup_test_db().await?;
let user_id = "search_user";
let chunk = TextChunk::new(
"search_src".into(),
"Async Rust programming uses Tokio runtime for concurrent tasks.".into(),
"entity_source".into(),
"Async Rust programming uses the Tokio runtime for concurrent tasks.".into(),
user_id.into(),
);
TextChunk::store_with_embedding(chunk.clone(), chunk_embedding_primary(), &db).await?;
let config = RetrievalConfig::for_search(pipeline::SearchTarget::Both);
let entity = KnowledgeEntity::new(
"entity_source".into(),
"Tokio Runtime".into(),
"Async runtime for Rust".into(),
KnowledgeEntityType::Document,
None,
user_id.into(),
);
db.store_item(entity).await?;
let openai_client = Client::new();
let params = pipeline::StrategyParams {
let params = pipeline::RetrievalParams {
db_client: &db,
openai_client: &openai_client,
embedding_provider: None,
input_text: "async rust programming",
user_id,
config,
config: RetrievalConfig::with_entities(),
reranker: None,
};
let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
.await?;
let results = pipeline::run_with_embedding(params, test_embedding()).await?;
let StrategyOutput::Search(search_result) = results else {
anyhow::bail!("expected Search output");
let RetrievalOutput::WithEntities { chunks, entities } = results else {
anyhow::bail!("expected WithEntities output");
};
// Should return chunks (entities may be empty if none stored)
assert!(!chunks.is_empty(), "Should return chunks");
assert!(
!search_result.chunks.is_empty(),
"Search strategy should return chunks"
entities.iter().any(|e| e.entity.name == "Tokio Runtime"),
"Should resolve the entity owning the retrieved chunk"
);
assert!(
search_result
.chunks
entities
.iter()
.any(|c| c.chunk.chunk.contains("Tokio")),
"Search results should contain relevant chunks"
.find(|e| e.entity.name == "Tokio Runtime")
.is_some_and(|e| !e.chunks.is_empty()),
"Resolved entity should carry its contributing chunks"
);
Ok(())
}