chore: refactor retrieval pipeline to chunk-first RRF with derived entities and slimmer eval surface.

Collapse the multi-strategy entity engine into one benchmarked chunk retrieval path, derive entities from retrieved chunks, and update consumers, docs, and clippy fixes across the workspace.
2026-07-05 20:41:41 +02:00 · 2026-05-30 22:19:08 +02:00
parent a8e30192ba
commit e9d8654324
38 changed files with 1049 additions and 2614 deletions
@@ -1,10 +1,9 @@
 pub mod answer_retrieval;
-pub mod answer_retrieval_helper;

-pub mod graph;
 pub mod pipeline;
 pub mod reranking;
-pub mod scoring;
+
+pub(crate) mod scoring;

 use common::{
    error::AppError,
@@ -16,39 +15,28 @@ use common::{
 use reranking::RerankerLease;
 use tracing::instrument;

-// Strategy output variants - defined before pipeline module
+/// Result of a retrieval run.
+///
+/// Chunk retrieval is always performed; entities are only present when the caller
+/// requested entity resolution via [`RetrievalConfig::with_entities`].
 #[derive(Debug)]
-pub enum StrategyOutput {
-    Entities(Vec<RetrievedEntity>),
+pub enum RetrievalOutput {
    Chunks(Vec<RetrievedChunk>),
-    Search(SearchResult),
-}
-
-/// Unified search result containing both chunks and entities
-#[derive(Debug, Clone)]
-pub struct SearchResult {
-    pub chunks: Vec<RetrievedChunk>,
-    pub entities: Vec<RetrievedEntity>,
-}
-
-impl SearchResult {
-    pub fn new(chunks: Vec<RetrievedChunk>, entities: Vec<RetrievedEntity>) -> Self {
-        Self { chunks, entities }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.chunks.is_empty() && self.entities.is_empty()
-    }
+    WithEntities {
+        chunks: Vec<RetrievedChunk>,
+        entities: Vec<RetrievedEntity>,
+    },
 }

 pub use pipeline::{
-    retrieved_entities_to_json, Diagnostics, StageTimings, RetrievalConfig,
-    RetrievalStrategy, RetrievalTuning, RetrievalTuningFlags, SearchTarget,
+    retrieved_entities_to_json, Diagnostics, RetrievalConfig, RetrievalParams, StageKind,
+    StageTimings,
 };

-// Backward-compatible type aliases for external consumers
-pub type PipelineDiagnostics = Diagnostics;
-pub type PipelineStageTimings = StageTimings;
+/// Round a score to three decimal places for JSON output.
+pub(crate) fn round_score(value: f32) -> f64 {
+    (f64::from(value) * 1000.0).round() / 1000.0
+}

 // Captures a supporting chunk plus its fused retrieval score for downstream prompts.
 #[derive(Debug, Clone)]
@@ -57,7 +45,7 @@ pub struct RetrievedChunk {
    pub score: f32,
 }

-// Final entity representation returned to callers, enriched with ranked chunks.
+// Knowledge entity resolved from retrieved chunks, enriched with its contributing chunks.
 #[derive(Debug, Clone)]
 pub struct RetrievedEntity {
    pub entity: KnowledgeEntity,
@@ -65,9 +53,9 @@ pub struct RetrievedEntity {
    pub chunks: Vec<RetrievedChunk>,
 }

-/// Primary orchestrator for the process of retrieving `KnowledgeEntity` values related to an `input_text`
+/// Run chunk-first hybrid retrieval for `input_text`, optionally resolving owning entities.
 #[instrument(skip_all, fields(user_id))]
-pub async fn retrieve_entities(
+pub async fn retrieve(
    db_client: &SurrealDbClient,
    openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
    embedding_provider: Option<&common::utils::embedding::EmbeddingProvider>,
@@ -75,8 +63,8 @@ pub async fn retrieve_entities(
    user_id: &str,
    config: RetrievalConfig,
    reranker: Option<RerankerLease>,
-) -> Result<StrategyOutput, AppError> {
-    let params = pipeline::StrategyParams {
+) -> Result<RetrievalOutput, AppError> {
+    let params = pipeline::RetrievalParams {
        db_client,
        openai_client,
        embedding_provider,
@@ -94,6 +82,7 @@ mod tests {
    use anyhow::{self};
    use async_openai::Client;
    use common::storage::indexes::ensure_runtime;
+    use common::storage::types::knowledge_entity::{KnowledgeEntity, KnowledgeEntityType};
    use common::storage::types::system_settings::SystemSettings;
    use uuid::Uuid;

@@ -133,7 +122,7 @@ mod tests {
    }

    #[tokio::test]
-    async fn test_default_strategy_retrieves_chunks() -> anyhow::Result<()> {
+    async fn test_chunk_retrieval_returns_chunks() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let user_id = "test_user";
        let chunk = TextChunk::new(
@@ -145,7 +134,7 @@ mod tests {
        TextChunk::store_with_embedding(chunk.clone(), chunk_embedding_primary(), &db).await?;

        let openai_client = Client::new();
-        let params = pipeline::StrategyParams {
+        let params = pipeline::RetrievalParams {
            db_client: &db,
            openai_client: &openai_client,
            embedding_provider: None,
@@ -154,12 +143,13 @@ mod tests {
            config: RetrievalConfig::default(),
            reranker: None,
        };
-        let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
-            .await?;
+        let results = pipeline::run_with_embedding(params, test_embedding()).await?;

        let chunks = match results {
-            StrategyOutput::Chunks(items) => items,
-            other => anyhow::bail!("expected chunk results, got {other:?}"),
+            RetrievalOutput::Chunks(items) => items,
+            RetrievalOutput::WithEntities { .. } => {
+                anyhow::bail!("expected chunk results, got entities")
+            }
        };

        assert!(!chunks.is_empty(), "Expected at least one retrieval result");
@@ -171,8 +161,7 @@ mod tests {
    }

    #[tokio::test]
-    async fn test_default_strategy_returns_chunks_from_multiple_sources(
-    ) -> anyhow::Result<()> {
+    async fn test_chunk_retrieval_returns_chunks_from_multiple_sources() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let user_id = "multi_source_user";

@@ -191,7 +180,7 @@ mod tests {
        TextChunk::store_with_embedding(secondary_chunk, chunk_embedding_secondary(), &db).await?;

        let openai_client = Client::new();
-        let params = pipeline::StrategyParams {
+        let params = pipeline::RetrievalParams {
            db_client: &db,
            openai_client: &openai_client,
            embedding_provider: None,
@@ -200,12 +189,13 @@ mod tests {
            config: RetrievalConfig::default(),
            reranker: None,
        };
-        let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
-            .await?;
+        let results = pipeline::run_with_embedding(params, test_embedding()).await?;

        let chunks = match results {
-            StrategyOutput::Chunks(items) => items,
-            other => anyhow::bail!("expected chunk results, got {other:?}"),
+            RetrievalOutput::Chunks(items) => items,
+            RetrievalOutput::WithEntities { .. } => {
+                anyhow::bail!("expected chunk results, got entities")
+            }
        };

        assert!(chunks.len() >= 2, "Expected chunks from multiple sources");
@@ -223,96 +213,54 @@ mod tests {
    }

    #[tokio::test]
-    async fn test_revised_strategy_returns_chunks() -> anyhow::Result<()> {
+    async fn test_with_entities_resolves_owning_entities() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
-        let user_id = "chunk_user";
-        let chunk_one = TextChunk::new(
-            "src_alpha".into(),
-            "Tokio tasks execute on worker threads managed by the runtime.".into(),
-            user_id.into(),
-        );
-        let chunk_two = TextChunk::new(
-            "src_beta".into(),
-            "Hyper utilizes Tokio to drive HTTP state machines efficiently.".into(),
-            user_id.into(),
-        );
+        let user_id = "entity_user";

-        TextChunk::store_with_embedding(chunk_one.clone(), chunk_embedding_primary(), &db).await?;
-        TextChunk::store_with_embedding(chunk_two.clone(), chunk_embedding_secondary(), &db).await?;
-
-        let config = RetrievalConfig::with_strategy(RetrievalStrategy::Default);
-        let openai_client = Client::new();
-        let params = pipeline::StrategyParams {
-            db_client: &db,
-            openai_client: &openai_client,
-            embedding_provider: None,
-            input_text: "tokio runtime worker behavior",
-            user_id,
-            config,
-            reranker: None,
-        };
-        let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
-            .await?;
-
-        let chunks = match results {
-            StrategyOutput::Chunks(items) => items,
-            other => anyhow::bail!("expected chunk results, got {other:?}"),
-        };
-
-        assert!(
-            !chunks.is_empty(),
-            "Revised strategy should return chunk-only responses"
-        );
-        assert!(
-            chunks
-                .iter()
-                .any(|entry| entry.chunk.chunk.contains("Tokio")),
-            "Chunk results should contain relevant snippets"
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_search_strategy_returns_search_result() -> anyhow::Result<()> {
-        let db = setup_test_db().await?;
-        let user_id = "search_user";
        let chunk = TextChunk::new(
-            "search_src".into(),
-            "Async Rust programming uses Tokio runtime for concurrent tasks.".into(),
+            "entity_source".into(),
+            "Async Rust programming uses the Tokio runtime for concurrent tasks.".into(),
            user_id.into(),
        );
-
        TextChunk::store_with_embedding(chunk.clone(), chunk_embedding_primary(), &db).await?;

-        let config = RetrievalConfig::for_search(pipeline::SearchTarget::Both);
+        let entity = KnowledgeEntity::new(
+            "entity_source".into(),
+            "Tokio Runtime".into(),
+            "Async runtime for Rust".into(),
+            KnowledgeEntityType::Document,
+            None,
+            user_id.into(),
+        );
+        db.store_item(entity).await?;
+
        let openai_client = Client::new();
-        let params = pipeline::StrategyParams {
+        let params = pipeline::RetrievalParams {
            db_client: &db,
            openai_client: &openai_client,
            embedding_provider: None,
            input_text: "async rust programming",
            user_id,
-            config,
+            config: RetrievalConfig::with_entities(),
            reranker: None,
        };
-        let results = pipeline::run_pipeline_with_embedding(params, test_embedding())
-            .await?;
+        let results = pipeline::run_with_embedding(params, test_embedding()).await?;

-        let StrategyOutput::Search(search_result) = results else {
-            anyhow::bail!("expected Search output");
+        let RetrievalOutput::WithEntities { chunks, entities } = results else {
+            anyhow::bail!("expected WithEntities output");
        };

-        // Should return chunks (entities may be empty if none stored)
+        assert!(!chunks.is_empty(), "Should return chunks");
        assert!(
-            !search_result.chunks.is_empty(),
-            "Search strategy should return chunks"
+            entities.iter().any(|e| e.entity.name == "Tokio Runtime"),
+            "Should resolve the entity owning the retrieved chunk"
        );
        assert!(
-            search_result
-                .chunks
+            entities
                .iter()
-                .any(|c| c.chunk.chunk.contains("Tokio")),
-            "Search results should contain relevant chunks"
+                .find(|e| e.entity.name == "Tokio Runtime")
+                .is_some_and(|e| !e.chunks.is_empty()),
+            "Resolved entity should carry its contributing chunks"
        );
        Ok(())
    }