retrieval-pipeline: v0

2026-04-24 01:38:29 +02:00 · 2025-11-18 21:20:27 +01:00
parent 6b7befbd04
commit f535df7e61
32 changed files with 1189 additions and 453 deletions
--- a/composite-retrieval/Cargo.toml
+++ b/composite-retrieval/Cargo.toml
@@ -1,25 +0,0 @@
-[package]
-name = "composite-retrieval"
-version = "0.1.0"
-edition = "2021"
-license = "AGPL-3.0-or-later"
-
-[lints]
-workspace = true
-
-[dependencies]
-tokio = { workspace = true }
-serde = { workspace = true }
-axum = { workspace = true }
-tracing = { workspace = true }
-anyhow = { workspace = true }
-thiserror = { workspace = true }
-serde_json = { workspace = true }
-surrealdb = { workspace = true }
-futures = { workspace = true }
-async-openai = { workspace = true }
-uuid = { workspace = true }
-fastembed = { workspace = true }
- 
-common = { path = "../common", features = ["test-utils"] }
-state-machines = { workspace = true }
--- a/composite-retrieval/src/answer_retrieval.rs
+++ b/composite-retrieval/src/answer_retrieval.rs
@@ -1,117 +0,0 @@
-use async_openai::{
-    error::OpenAIError,
-    types::{
-        ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
-        CreateChatCompletionRequest, CreateChatCompletionRequestArgs, CreateChatCompletionResponse,
-        ResponseFormat, ResponseFormatJsonSchema,
-    },
-};
-use common::{
-    error::AppError,
-    storage::types::{
-        message::{format_history, Message},
-        system_settings::SystemSettings,
-    },
-};
-use serde::Deserialize;
-use serde_json::Value;
-
-use super::answer_retrieval_helper::get_query_response_schema;
-
-#[derive(Debug, Deserialize)]
-pub struct Reference {
-    #[allow(dead_code)]
-    pub reference: String,
-}
-
-#[derive(Debug, Deserialize)]
-pub struct LLMResponseFormat {
-    pub answer: String,
-    #[allow(dead_code)]
-    pub references: Vec<Reference>,
-}
-
-#[derive(Debug)]
-pub struct Answer {
-    pub content: String,
-    pub references: Vec<String>,
-}
-
-pub fn create_user_message(entities_json: &Value, query: &str) -> String {
-    format!(
-        r"
-        Context Information:
-        ==================
-        {entities_json}
-
-        User Question:
-        ==================
-        {query}
-        "
-    )
-}
-
-pub fn create_user_message_with_history(
-    entities_json: &Value,
-    history: &[Message],
-    query: &str,
-) -> String {
-    format!(
-        r"
-        Chat history:
-        ==================
-        {}
-        
-        Context Information:
-        ==================
-        {}
-
-        User Question:
-        ==================
-        {}
-        ",
-        format_history(history),
-        entities_json,
-        query
-    )
-}
-
-pub fn create_chat_request(
-    user_message: String,
-    settings: &SystemSettings,
-) -> Result<CreateChatCompletionRequest, OpenAIError> {
-    let response_format = ResponseFormat::JsonSchema {
-        json_schema: ResponseFormatJsonSchema {
-            description: Some("Query answering AI".into()),
-            name: "query_answering_with_uuids".into(),
-            schema: Some(get_query_response_schema()),
-            strict: Some(true),
-        },
-    };
-
-    CreateChatCompletionRequestArgs::default()
-        .model(&settings.query_model)
-        .messages([
-            ChatCompletionRequestSystemMessage::from(settings.query_system_prompt.clone()).into(),
-            ChatCompletionRequestUserMessage::from(user_message).into(),
-        ])
-        .response_format(response_format)
-        .build()
-}
-
-pub async fn process_llm_response(
-    response: CreateChatCompletionResponse,
-) -> Result<LLMResponseFormat, AppError> {
-    response
-        .choices
-        .first()
-        .and_then(|choice| choice.message.content.as_ref())
-        .ok_or(AppError::LLMParsing(
-            "No content found in LLM response".into(),
-        ))
-        .and_then(|content| {
-            serde_json::from_str::<LLMResponseFormat>(content).map_err(|e| {
-                AppError::LLMParsing(format!("Failed to parse LLM response into analysis: {e}"))
-            })
-        })
-}
--- a/composite-retrieval/src/answer_retrieval_helper.rs
+++ b/composite-retrieval/src/answer_retrieval_helper.rs
@@ -1,26 +0,0 @@
-use common::storage::types::system_prompts::DEFAULT_QUERY_SYSTEM_PROMPT;
-use serde_json::{json, Value};
-
-pub static QUERY_SYSTEM_PROMPT: &str = DEFAULT_QUERY_SYSTEM_PROMPT;
-
-pub fn get_query_response_schema() -> Value {
-    json!({
-       "type": "object",
-       "properties": {
-           "answer": { "type": "string" },
-           "references": {
-               "type": "array",
-               "items": {
-                   "type": "object",
-                   "properties": {
-                       "reference": { "type": "string" },
-                   },
-               "required": ["reference"],
-               "additionalProperties": false,
-               }
-           }
-       },
-       "required": ["answer", "references"],
-       "additionalProperties": false
-    })
-}
--- a/composite-retrieval/src/fts.rs
+++ b/composite-retrieval/src/fts.rs
@@ -1,265 +0,0 @@
-use std::collections::HashMap;
-
-use serde::Deserialize;
-use tracing::debug;
-
-use common::{
-    error::AppError,
-    storage::{db::SurrealDbClient, types::StoredObject},
-};
-
-use crate::scoring::Scored;
-use common::storage::types::file_info::deserialize_flexible_id;
-use surrealdb::sql::Thing;
-
-#[derive(Debug, Deserialize)]
-struct FtsScoreRow {
-    #[serde(deserialize_with = "deserialize_flexible_id")]
-    id: String,
-    fts_score: Option<f32>,
-}
-
-/// Executes a full-text search query against SurrealDB and returns scored results.
-///
-/// The function expects FTS indexes to exist for the provided table. Currently supports
-/// `knowledge_entity` (name + description) and `text_chunk` (chunk).
-pub async fn find_items_by_fts<T>(
-    take: usize,
-    query: &str,
-    db_client: &SurrealDbClient,
-    table: &str,
-    user_id: &str,
-) -> Result<Vec<Scored<T>>, AppError>
-where
-    T: for<'de> serde::Deserialize<'de> + StoredObject,
-{
-    let (filter_clause, score_clause) = match table {
-        "knowledge_entity" => (
-            "(name @0@ $terms OR description @1@ $terms)",
-            "(IF search::score(0) != NONE THEN search::score(0) ELSE 0 END) + \
-             (IF search::score(1) != NONE THEN search::score(1) ELSE 0 END)",
-        ),
-        "text_chunk" => (
-            "(chunk @0@ $terms)",
-            "IF search::score(0) != NONE THEN search::score(0) ELSE 0 END",
-        ),
-        _ => {
-            return Err(AppError::Validation(format!(
-                "FTS not configured for table '{table}'"
-            )))
-        }
-    };
-
-    let sql = format!(
-        "SELECT id, {score_clause} AS fts_score \
-         FROM {table} \
-         WHERE {filter_clause} \
-           AND user_id = $user_id \
-         ORDER BY fts_score DESC \
-         LIMIT $limit",
-        table = table,
-        filter_clause = filter_clause,
-        score_clause = score_clause
-    );
-
-    debug!(
-        table = table,
-        limit = take,
-        "Executing FTS query with filter clause: {}",
-        filter_clause
-    );
-
-    let mut response = db_client
-        .query(sql)
-        .bind(("terms", query.to_owned()))
-        .bind(("user_id", user_id.to_owned()))
-        .bind(("limit", take as i64))
-        .await?;
-
-    let score_rows: Vec<FtsScoreRow> = response.take(0)?;
-
-    if score_rows.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let ids: Vec<String> = score_rows.iter().map(|row| row.id.clone()).collect();
-    let thing_ids: Vec<Thing> = ids
-        .iter()
-        .map(|id| Thing::from((table, id.as_str())))
-        .collect();
-
-    let mut items_response = db_client
-        .query("SELECT * FROM type::table($table) WHERE id IN $things AND user_id = $user_id")
-        .bind(("table", table.to_owned()))
-        .bind(("things", thing_ids.clone()))
-        .bind(("user_id", user_id.to_owned()))
-        .await?;
-
-    let items: Vec<T> = items_response.take(0)?;
-
-    let mut item_map: HashMap<String, T> = items
-        .into_iter()
-        .map(|item| (item.get_id().to_owned(), item))
-        .collect();
-
-    let mut results = Vec::with_capacity(score_rows.len());
-    for row in score_rows {
-        if let Some(item) = item_map.remove(&row.id) {
-            let score = row.fts_score.unwrap_or_default();
-            results.push(Scored::new(item).with_fts_score(score));
-        }
-    }
-
-    Ok(results)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use common::storage::types::{
-        knowledge_entity::{KnowledgeEntity, KnowledgeEntityType},
-        text_chunk::TextChunk,
-        StoredObject,
-    };
-    use uuid::Uuid;
-
-    fn dummy_embedding() -> Vec<f32> {
-        vec![0.0; 1536]
-    }
-
-    #[tokio::test]
-    async fn fts_preserves_single_field_score_for_name() {
-        let namespace = "fts_test_ns";
-        let database = &Uuid::new_v4().to_string();
-        let db = SurrealDbClient::memory(namespace, database)
-            .await
-            .expect("failed to create in-memory surreal");
-
-        db.apply_migrations()
-            .await
-            .expect("failed to apply migrations");
-
-        let user_id = "user_fts";
-        let entity = KnowledgeEntity::new(
-            "source_a".into(),
-            "Rustacean handbook".into(),
-            "completely unrelated description".into(),
-            KnowledgeEntityType::Document,
-            None,
-            dummy_embedding(),
-            user_id.into(),
-        );
-
-        db.store_item(entity.clone())
-            .await
-            .expect("failed to insert entity");
-
-        db.rebuild_indexes()
-            .await
-            .expect("failed to rebuild indexes");
-
-        let results = find_items_by_fts::<KnowledgeEntity>(
-            5,
-            "rustacean",
-            &db,
-            KnowledgeEntity::table_name(),
-            user_id,
-        )
-        .await
-        .expect("fts query failed");
-
-        assert!(!results.is_empty(), "expected at least one FTS result");
-        assert!(
-            results[0].scores.fts.is_some(),
-            "expected an FTS score when only the name matched"
-        );
-    }
-
-    #[tokio::test]
-    async fn fts_preserves_single_field_score_for_description() {
-        let namespace = "fts_test_ns_desc";
-        let database = &Uuid::new_v4().to_string();
-        let db = SurrealDbClient::memory(namespace, database)
-            .await
-            .expect("failed to create in-memory surreal");
-
-        db.apply_migrations()
-            .await
-            .expect("failed to apply migrations");
-
-        let user_id = "user_fts_desc";
-        let entity = KnowledgeEntity::new(
-            "source_b".into(),
-            "neutral name".into(),
-            "Detailed notes about async runtimes".into(),
-            KnowledgeEntityType::Document,
-            None,
-            dummy_embedding(),
-            user_id.into(),
-        );
-
-        db.store_item(entity.clone())
-            .await
-            .expect("failed to insert entity");
-
-        db.rebuild_indexes()
-            .await
-            .expect("failed to rebuild indexes");
-
-        let results = find_items_by_fts::<KnowledgeEntity>(
-            5,
-            "async",
-            &db,
-            KnowledgeEntity::table_name(),
-            user_id,
-        )
-        .await
-        .expect("fts query failed");
-
-        assert!(!results.is_empty(), "expected at least one FTS result");
-        assert!(
-            results[0].scores.fts.is_some(),
-            "expected an FTS score when only the description matched"
-        );
-    }
-
-    #[tokio::test]
-    async fn fts_preserves_scores_for_text_chunks() {
-        let namespace = "fts_test_ns_chunks";
-        let database = &Uuid::new_v4().to_string();
-        let db = SurrealDbClient::memory(namespace, database)
-            .await
-            .expect("failed to create in-memory surreal");
-
-        db.apply_migrations()
-            .await
-            .expect("failed to apply migrations");
-
-        let user_id = "user_fts_chunk";
-        let chunk = TextChunk::new(
-            "source_chunk".into(),
-            "GraphQL documentation reference".into(),
-            dummy_embedding(),
-            user_id.into(),
-        );
-
-        db.store_item(chunk.clone())
-            .await
-            .expect("failed to insert chunk");
-
-        db.rebuild_indexes()
-            .await
-            .expect("failed to rebuild indexes");
-
-        let results =
-            find_items_by_fts::<TextChunk>(5, "graphql", &db, TextChunk::table_name(), user_id)
-                .await
-                .expect("fts query failed");
-
-        assert!(!results.is_empty(), "expected at least one FTS result");
-        assert!(
-            results[0].scores.fts.is_some(),
-            "expected an FTS score when chunk field matched"
-        );
-    }
-}
--- a/composite-retrieval/src/graph.rs
+++ b/composite-retrieval/src/graph.rs
@@ -1,432 +0,0 @@
-use std::collections::{HashMap, HashSet};
-
-use surrealdb::{sql::Thing, Error};
-
-use common::storage::{
-    db::SurrealDbClient,
-    types::{
-        knowledge_entity::KnowledgeEntity, knowledge_relationship::KnowledgeRelationship,
-        StoredObject,
-    },
-};
-
-/// Retrieves database entries that match a specific source identifier.
-///
-/// This function queries the database for all records in a specified table that have
-/// a matching `source_id` field. It's commonly used to find related entities or
-/// track the origin of database entries.
-///
-/// # Arguments
-///
-/// * `source_id` - The identifier to search for in the database
-/// * `table_name` - The name of the table to search in
-/// * `db_client` - The `SurrealDB` client instance for database operations
-///
-/// # Type Parameters
-///
-/// * `T` - The type to deserialize the query results into. Must implement `serde::Deserialize`
-///
-/// # Returns
-///
-/// Returns a `Result` containing either:
-/// * `Ok(Vec<T>)` - A vector of matching records deserialized into type `T`
-/// * `Err(Error)` - An error if the database query fails
-///
-/// # Errors
-///
-/// This function will return a `Error` if:
-/// * The database query fails to execute
-/// * The results cannot be deserialized into type `T`
-pub async fn find_entities_by_source_ids<T>(
-    source_ids: Vec<String>,
-    table_name: &str,
-    user_id: &str,
-    db: &SurrealDbClient,
-) -> Result<Vec<T>, Error>
-where
-    T: for<'de> serde::Deserialize<'de>,
-{
-    let query =
-        "SELECT * FROM type::table($table) WHERE source_id IN $source_ids AND user_id = $user_id";
-
-    db.query(query)
-        .bind(("table", table_name.to_owned()))
-        .bind(("source_ids", source_ids))
-        .bind(("user_id", user_id.to_owned()))
-        .await?
-        .take(0)
-}
-
-/// Find entities by their relationship to the id
-pub async fn find_entities_by_relationship_by_id(
-    db: &SurrealDbClient,
-    entity_id: &str,
-    user_id: &str,
-    limit: usize,
-) -> Result<Vec<KnowledgeEntity>, Error> {
-    let mut relationships_response = db
-        .query(
-            "
-            SELECT * FROM relates_to
-            WHERE metadata.user_id = $user_id
-              AND (in = type::thing('knowledge_entity', $entity_id)
-                   OR out = type::thing('knowledge_entity', $entity_id))
-            ",
-        )
-        .bind(("entity_id", entity_id.to_owned()))
-        .bind(("user_id", user_id.to_owned()))
-        .await?;
-
-    let relationships: Vec<KnowledgeRelationship> = relationships_response.take(0)?;
-    if relationships.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let mut neighbor_ids: Vec<String> = Vec::new();
-    let mut seen: HashSet<String> = HashSet::new();
-    for rel in relationships {
-        if rel.in_ == entity_id {
-            if seen.insert(rel.out.clone()) {
-                neighbor_ids.push(rel.out);
-            }
-        } else if rel.out == entity_id {
-            if seen.insert(rel.in_.clone()) {
-                neighbor_ids.push(rel.in_);
-            }
-        } else {
-            if seen.insert(rel.in_.clone()) {
-                neighbor_ids.push(rel.in_.clone());
-            }
-            if seen.insert(rel.out.clone()) {
-                neighbor_ids.push(rel.out);
-            }
-        }
-    }
-
-    neighbor_ids.retain(|id| id != entity_id);
-
-    if neighbor_ids.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    if limit > 0 && neighbor_ids.len() > limit {
-        neighbor_ids.truncate(limit);
-    }
-
-    let thing_ids: Vec<Thing> = neighbor_ids
-        .iter()
-        .map(|id| Thing::from((KnowledgeEntity::table_name(), id.as_str())))
-        .collect();
-
-    let mut neighbors_response = db
-        .query("SELECT * FROM type::table($table) WHERE id IN $things AND user_id = $user_id")
-        .bind(("table", KnowledgeEntity::table_name().to_owned()))
-        .bind(("things", thing_ids))
-        .bind(("user_id", user_id.to_owned()))
-        .await?;
-
-    let neighbors: Vec<KnowledgeEntity> = neighbors_response.take(0)?;
-    if neighbors.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let mut neighbor_map: HashMap<String, KnowledgeEntity> = neighbors
-        .into_iter()
-        .map(|entity| (entity.id.clone(), entity))
-        .collect();
-
-    let mut ordered = Vec::new();
-    for id in neighbor_ids {
-        if let Some(entity) = neighbor_map.remove(&id) {
-            ordered.push(entity);
-        }
-        if limit > 0 && ordered.len() >= limit {
-            break;
-        }
-    }
-
-    Ok(ordered)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use common::storage::types::knowledge_entity::{KnowledgeEntity, KnowledgeEntityType};
-    use common::storage::types::knowledge_relationship::KnowledgeRelationship;
-    use common::storage::types::StoredObject;
-    use uuid::Uuid;
-
-    #[tokio::test]
-    async fn test_find_entities_by_source_ids() {
-        // Setup in-memory database for testing
-        let namespace = "test_ns";
-        let database = &Uuid::new_v4().to_string();
-        let db = SurrealDbClient::memory(namespace, database)
-            .await
-            .expect("Failed to start in-memory surrealdb");
-
-        // Create some test entities with different source_ids
-        let source_id1 = "source123".to_string();
-        let source_id2 = "source456".to_string();
-        let source_id3 = "source789".to_string();
-
-        let entity_type = KnowledgeEntityType::Document;
-        let embedding = vec![0.1, 0.2, 0.3];
-        let user_id = "user123".to_string();
-
-        // Entity with source_id1
-        let entity1 = KnowledgeEntity::new(
-            source_id1.clone(),
-            "Entity 1".to_string(),
-            "Description 1".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        // Entity with source_id2
-        let entity2 = KnowledgeEntity::new(
-            source_id2.clone(),
-            "Entity 2".to_string(),
-            "Description 2".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        // Another entity with source_id1
-        let entity3 = KnowledgeEntity::new(
-            source_id1.clone(),
-            "Entity 3".to_string(),
-            "Description 3".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        // Entity with source_id3
-        let entity4 = KnowledgeEntity::new(
-            source_id3.clone(),
-            "Entity 4".to_string(),
-            "Description 4".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        // Store all entities
-        db.store_item(entity1.clone())
-            .await
-            .expect("Failed to store entity 1");
-        db.store_item(entity2.clone())
-            .await
-            .expect("Failed to store entity 2");
-        db.store_item(entity3.clone())
-            .await
-            .expect("Failed to store entity 3");
-        db.store_item(entity4.clone())
-            .await
-            .expect("Failed to store entity 4");
-
-        // Test finding entities by multiple source_ids
-        let source_ids = vec![source_id1.clone(), source_id2.clone()];
-        let found_entities: Vec<KnowledgeEntity> =
-            find_entities_by_source_ids(source_ids, KnowledgeEntity::table_name(), &user_id, &db)
-                .await
-                .expect("Failed to find entities by source_ids");
-
-        // Should find 3 entities (2 with source_id1, 1 with source_id2)
-        assert_eq!(
-            found_entities.len(),
-            3,
-            "Should find 3 entities with the specified source_ids"
-        );
-
-        // Check that entities with source_id1 and source_id2 are found
-        let found_source_ids: Vec<String> =
-            found_entities.iter().map(|e| e.source_id.clone()).collect();
-        assert!(
-            found_source_ids.contains(&source_id1),
-            "Should find entities with source_id1"
-        );
-        assert!(
-            found_source_ids.contains(&source_id2),
-            "Should find entities with source_id2"
-        );
-        assert!(
-            !found_source_ids.contains(&source_id3),
-            "Should not find entities with source_id3"
-        );
-
-        // Test finding entities by a single source_id
-        let single_source_id = vec![source_id1.clone()];
-        let found_entities: Vec<KnowledgeEntity> = find_entities_by_source_ids(
-            single_source_id,
-            KnowledgeEntity::table_name(),
-            &user_id,
-            &db,
-        )
-        .await
-        .expect("Failed to find entities by single source_id");
-
-        // Should find 2 entities with source_id1
-        assert_eq!(
-            found_entities.len(),
-            2,
-            "Should find 2 entities with source_id1"
-        );
-
-        // Check that all found entities have source_id1
-        for entity in found_entities {
-            assert_eq!(
-                entity.source_id, source_id1,
-                "All found entities should have source_id1"
-            );
-        }
-
-        // Test finding entities with non-existent source_id
-        let non_existent_source_id = vec!["non_existent_source".to_string()];
-        let found_entities: Vec<KnowledgeEntity> = find_entities_by_source_ids(
-            non_existent_source_id,
-            KnowledgeEntity::table_name(),
-            &user_id,
-            &db,
-        )
-        .await
-        .expect("Failed to find entities by non-existent source_id");
-
-        // Should find 0 entities
-        assert_eq!(
-            found_entities.len(),
-            0,
-            "Should find 0 entities with non-existent source_id"
-        );
-    }
-
-    #[tokio::test]
-    async fn test_find_entities_by_relationship_by_id() {
-        // Setup in-memory database for testing
-        let namespace = "test_ns";
-        let database = &Uuid::new_v4().to_string();
-        let db = SurrealDbClient::memory(namespace, database)
-            .await
-            .expect("Failed to start in-memory surrealdb");
-
-        // Create some test entities
-        let entity_type = KnowledgeEntityType::Document;
-        let embedding = vec![0.1, 0.2, 0.3];
-        let user_id = "user123".to_string();
-
-        // Create the central entity we'll query relationships for
-        let central_entity = KnowledgeEntity::new(
-            "central_source".to_string(),
-            "Central Entity".to_string(),
-            "Central Description".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        // Create related entities
-        let related_entity1 = KnowledgeEntity::new(
-            "related_source1".to_string(),
-            "Related Entity 1".to_string(),
-            "Related Description 1".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        let related_entity2 = KnowledgeEntity::new(
-            "related_source2".to_string(),
-            "Related Entity 2".to_string(),
-            "Related Description 2".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        // Create an unrelated entity
-        let unrelated_entity = KnowledgeEntity::new(
-            "unrelated_source".to_string(),
-            "Unrelated Entity".to_string(),
-            "Unrelated Description".to_string(),
-            entity_type.clone(),
-            None,
-            embedding.clone(),
-            user_id.clone(),
-        );
-
-        // Store all entities
-        let central_entity = db
-            .store_item(central_entity.clone())
-            .await
-            .expect("Failed to store central entity")
-            .unwrap();
-        let related_entity1 = db
-            .store_item(related_entity1.clone())
-            .await
-            .expect("Failed to store related entity 1")
-            .unwrap();
-        let related_entity2 = db
-            .store_item(related_entity2.clone())
-            .await
-            .expect("Failed to store related entity 2")
-            .unwrap();
-        let _unrelated_entity = db
-            .store_item(unrelated_entity.clone())
-            .await
-            .expect("Failed to store unrelated entity")
-            .unwrap();
-
-        // Create relationships
-        let source_id = "relationship_source".to_string();
-
-        // Create relationship 1: central -> related1
-        let relationship1 = KnowledgeRelationship::new(
-            central_entity.id.clone(),
-            related_entity1.id.clone(),
-            user_id.clone(),
-            source_id.clone(),
-            "references".to_string(),
-        );
-
-        // Create relationship 2: central -> related2
-        let relationship2 = KnowledgeRelationship::new(
-            central_entity.id.clone(),
-            related_entity2.id.clone(),
-            user_id.clone(),
-            source_id.clone(),
-            "contains".to_string(),
-        );
-
-        // Store relationships
-        relationship1
-            .store_relationship(&db)
-            .await
-            .expect("Failed to store relationship 1");
-        relationship2
-            .store_relationship(&db)
-            .await
-            .expect("Failed to store relationship 2");
-
-        // Test finding entities related to the central entity
-        let related_entities =
-            find_entities_by_relationship_by_id(&db, &central_entity.id, &user_id, usize::MAX)
-                .await
-                .expect("Failed to find entities by relationship");
-
-        // Check that we found relationships
-        assert!(
-            related_entities.len() >= 2,
-            "Should find related entities in both directions"
-        );
-    }
-}
--- a/composite-retrieval/src/lib.rs
+++ b/composite-retrieval/src/lib.rs
@@ -1,267 +0,0 @@
-pub mod answer_retrieval;
-pub mod answer_retrieval_helper;
-pub mod fts;
-pub mod graph;
-pub mod pipeline;
-pub mod reranking;
-pub mod scoring;
-pub mod vector;
-
-use common::{
-    error::AppError,
-    storage::{
-        db::SurrealDbClient,
-        types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk},
-    },
-};
-use reranking::RerankerLease;
-use tracing::instrument;
-
-pub use pipeline::{retrieved_entities_to_json, RetrievalConfig, RetrievalTuning};
-
-// Captures a supporting chunk plus its fused retrieval score for downstream prompts.
-#[derive(Debug, Clone)]
-pub struct RetrievedChunk {
-    pub chunk: TextChunk,
-    pub score: f32,
-}
-
-// Final entity representation returned to callers, enriched with ranked chunks.
-#[derive(Debug, Clone)]
-pub struct RetrievedEntity {
-    pub entity: KnowledgeEntity,
-    pub score: f32,
-    pub chunks: Vec<RetrievedChunk>,
-}
-
-// Primary orchestrator for the process of retrieving KnowledgeEntitities related to a input_text
-#[instrument(skip_all, fields(user_id))]
-pub async fn retrieve_entities(
-    db_client: &SurrealDbClient,
-    openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
-    input_text: &str,
-    user_id: &str,
-    reranker: Option<RerankerLease>,
-) -> Result<Vec<RetrievedEntity>, AppError> {
-    pipeline::run_pipeline(
-        db_client,
-        openai_client,
-        input_text,
-        user_id,
-        RetrievalConfig::default(),
-        reranker,
-    )
-    .await
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use async_openai::Client;
-    use common::storage::types::{
-        knowledge_entity::{KnowledgeEntity, KnowledgeEntityType},
-        knowledge_relationship::KnowledgeRelationship,
-        text_chunk::TextChunk,
-    };
-    use pipeline::RetrievalConfig;
-    use uuid::Uuid;
-
-    fn test_embedding() -> Vec<f32> {
-        vec![0.9, 0.1, 0.0]
-    }
-
-    fn entity_embedding_high() -> Vec<f32> {
-        vec![0.8, 0.2, 0.0]
-    }
-
-    fn entity_embedding_low() -> Vec<f32> {
-        vec![0.1, 0.9, 0.0]
-    }
-
-    fn chunk_embedding_primary() -> Vec<f32> {
-        vec![0.85, 0.15, 0.0]
-    }
-
-    fn chunk_embedding_secondary() -> Vec<f32> {
-        vec![0.2, 0.8, 0.0]
-    }
-
-    async fn setup_test_db() -> SurrealDbClient {
-        let namespace = "test_ns";
-        let database = &Uuid::new_v4().to_string();
-        let db = SurrealDbClient::memory(namespace, database)
-            .await
-            .expect("Failed to start in-memory surrealdb");
-
-        db.apply_migrations()
-            .await
-            .expect("Failed to apply migrations");
-
-        db.query(
-            "BEGIN TRANSACTION;
-             REMOVE INDEX IF EXISTS idx_embedding_chunks ON TABLE text_chunk;
-             DEFINE INDEX idx_embedding_chunks ON TABLE text_chunk FIELDS embedding HNSW DIMENSION 3;
-             REMOVE INDEX IF EXISTS idx_embedding_entities ON TABLE knowledge_entity;
-             DEFINE INDEX idx_embedding_entities ON TABLE knowledge_entity FIELDS embedding HNSW DIMENSION 3;
-             COMMIT TRANSACTION;",
-        )
-        .await
-        .expect("Failed to configure indices");
-
-        db
-    }
-
-    #[tokio::test]
-    async fn test_retrieve_entities_with_embedding_basic_flow() {
-        let db = setup_test_db().await;
-        let user_id = "test_user";
-        let entity = KnowledgeEntity::new(
-            "source_1".into(),
-            "Rust async guide".into(),
-            "Detailed notes about async runtimes".into(),
-            KnowledgeEntityType::Document,
-            None,
-            entity_embedding_high(),
-            user_id.into(),
-        );
-        let chunk = TextChunk::new(
-            entity.source_id.clone(),
-            "Tokio uses cooperative scheduling for fairness.".into(),
-            chunk_embedding_primary(),
-            user_id.into(),
-        );
-
-        db.store_item(entity.clone())
-            .await
-            .expect("Failed to store entity");
-        db.store_item(chunk.clone())
-            .await
-            .expect("Failed to store chunk");
-
-        let openai_client = Client::new();
-        let results = pipeline::run_pipeline_with_embedding(
-            &db,
-            &openai_client,
-            test_embedding(),
-            "Rust concurrency async tasks",
-            user_id,
-            RetrievalConfig::default(),
-            None,
-        )
-        .await
-        .expect("Hybrid retrieval failed");
-
-        assert!(
-            !results.is_empty(),
-            "Expected at least one retrieval result"
-        );
-        let top = &results[0];
-        assert!(
-            top.entity.name.contains("Rust"),
-            "Expected Rust entity to be ranked first"
-        );
-        assert!(
-            !top.chunks.is_empty(),
-            "Expected Rust entity to include supporting chunks"
-        );
-    }
-
-    #[tokio::test]
-    async fn test_graph_relationship_enriches_results() {
-        let db = setup_test_db().await;
-        let user_id = "graph_user";
-
-        let primary = KnowledgeEntity::new(
-            "primary_source".into(),
-            "Async Rust patterns".into(),
-            "Explores async runtimes and scheduling strategies.".into(),
-            KnowledgeEntityType::Document,
-            None,
-            entity_embedding_high(),
-            user_id.into(),
-        );
-        let neighbor = KnowledgeEntity::new(
-            "neighbor_source".into(),
-            "Tokio Scheduler Deep Dive".into(),
-            "Details on Tokio's cooperative scheduler.".into(),
-            KnowledgeEntityType::Document,
-            None,
-            entity_embedding_low(),
-            user_id.into(),
-        );
-
-        db.store_item(primary.clone())
-            .await
-            .expect("Failed to store primary entity");
-        db.store_item(neighbor.clone())
-            .await
-            .expect("Failed to store neighbor entity");
-
-        let primary_chunk = TextChunk::new(
-            primary.source_id.clone(),
-            "Rust async tasks use Tokio's cooperative scheduler.".into(),
-            chunk_embedding_primary(),
-            user_id.into(),
-        );
-        let neighbor_chunk = TextChunk::new(
-            neighbor.source_id.clone(),
-            "Tokio's scheduler manages task fairness across executors.".into(),
-            chunk_embedding_secondary(),
-            user_id.into(),
-        );
-
-        db.store_item(primary_chunk)
-            .await
-            .expect("Failed to store primary chunk");
-        db.store_item(neighbor_chunk)
-            .await
-            .expect("Failed to store neighbor chunk");
-
-        let openai_client = Client::new();
-        let relationship = KnowledgeRelationship::new(
-            primary.id.clone(),
-            neighbor.id.clone(),
-            user_id.into(),
-            "relationship_source".into(),
-            "references".into(),
-        );
-        relationship
-            .store_relationship(&db)
-            .await
-            .expect("Failed to store relationship");
-
-        let results = pipeline::run_pipeline_with_embedding(
-            &db,
-            &openai_client,
-            test_embedding(),
-            "Rust concurrency async tasks",
-            user_id,
-            RetrievalConfig::default(),
-            None,
-        )
-        .await
-        .expect("Hybrid retrieval failed");
-
-        let mut neighbor_entry = None;
-        for entity in &results {
-            if entity.entity.id == neighbor.id {
-                neighbor_entry = Some(entity.clone());
-            }
-        }
-
-        let neighbor_entry =
-            neighbor_entry.expect("Graph-enriched neighbor should appear in results");
-
-        assert!(
-            neighbor_entry.score > 0.2,
-            "Graph-enriched entity should have a meaningful fused score"
-        );
-        assert!(
-            neighbor_entry
-                .chunks
-                .iter()
-                .all(|chunk| chunk.chunk.source_id == neighbor.source_id),
-            "Neighbor entity should surface its own supporting chunks"
-        );
-    }
-}
--- a/composite-retrieval/src/pipeline/config.rs
+++ b/composite-retrieval/src/pipeline/config.rs
@@ -1,69 +0,0 @@
-use serde::{Deserialize, Serialize};
-
-/// Tunable parameters that govern each retrieval stage.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RetrievalTuning {
-    pub entity_vector_take: usize,
-    pub chunk_vector_take: usize,
-    pub entity_fts_take: usize,
-    pub chunk_fts_take: usize,
-    pub score_threshold: f32,
-    pub fallback_min_results: usize,
-    pub token_budget_estimate: usize,
-    pub avg_chars_per_token: usize,
-    pub max_chunks_per_entity: usize,
-    pub lexical_match_weight: f32,
-    pub graph_traversal_seed_limit: usize,
-    pub graph_neighbor_limit: usize,
-    pub graph_score_decay: f32,
-    pub graph_seed_min_score: f32,
-    pub graph_vector_inheritance: f32,
-    pub rerank_blend_weight: f32,
-    pub rerank_scores_only: bool,
-    pub rerank_keep_top: usize,
-}
-
-impl Default for RetrievalTuning {
-    fn default() -> Self {
-        Self {
-            entity_vector_take: 15,
-            chunk_vector_take: 20,
-            entity_fts_take: 10,
-            chunk_fts_take: 20,
-            score_threshold: 0.35,
-            fallback_min_results: 10,
-            token_budget_estimate: 10000,
-            avg_chars_per_token: 4,
-            max_chunks_per_entity: 4,
-            lexical_match_weight: 0.15,
-            graph_traversal_seed_limit: 5,
-            graph_neighbor_limit: 6,
-            graph_score_decay: 0.75,
-            graph_seed_min_score: 0.4,
-            graph_vector_inheritance: 0.6,
-            rerank_blend_weight: 0.65,
-            rerank_scores_only: false,
-            rerank_keep_top: 8,
-        }
-    }
-}
-
-/// Wrapper containing tuning plus future flags for per-request overrides.
-#[derive(Debug, Clone)]
-pub struct RetrievalConfig {
-    pub tuning: RetrievalTuning,
-}
-
-impl RetrievalConfig {
-    pub fn new(tuning: RetrievalTuning) -> Self {
-        Self { tuning }
-    }
-}
-
-impl Default for RetrievalConfig {
-    fn default() -> Self {
-        Self {
-            tuning: RetrievalTuning::default(),
-        }
-    }
-}
--- a/composite-retrieval/src/pipeline/diagnostics.rs
+++ b/composite-retrieval/src/pipeline/diagnostics.rs
@@ -1,51 +0,0 @@
-use serde::Serialize;
-
-/// Captures instrumentation for each hybrid retrieval stage when diagnostics are enabled.
-#[derive(Debug, Clone, Default, Serialize)]
-pub struct PipelineDiagnostics {
-    pub collect_candidates: Option<CollectCandidatesStats>,
-    pub enrich_chunks_from_entities: Option<ChunkEnrichmentStats>,
-    pub assemble: Option<AssembleStats>,
-}
-
-#[derive(Debug, Clone, Default, Serialize)]
-pub struct CollectCandidatesStats {
-    pub vector_entity_candidates: usize,
-    pub vector_chunk_candidates: usize,
-    pub fts_entity_candidates: usize,
-    pub fts_chunk_candidates: usize,
-    pub vector_chunk_scores: Vec<f32>,
-    pub fts_chunk_scores: Vec<f32>,
-}
-
-#[derive(Debug, Clone, Default, Serialize)]
-pub struct ChunkEnrichmentStats {
-    pub filtered_entity_count: usize,
-    pub fallback_min_results: usize,
-    pub chunk_sources_considered: usize,
-    pub chunk_candidates_before_enrichment: usize,
-    pub chunk_candidates_after_enrichment: usize,
-    pub top_chunk_scores: Vec<f32>,
-}
-
-#[derive(Debug, Clone, Default, Serialize)]
-pub struct AssembleStats {
-    pub token_budget_start: usize,
-    pub token_budget_spent: usize,
-    pub token_budget_remaining: usize,
-    pub budget_exhausted: bool,
-    pub chunks_selected: usize,
-    pub chunks_skipped_due_budget: usize,
-    pub entity_count: usize,
-    pub entity_traces: Vec<EntityAssemblyTrace>,
-}
-
-#[derive(Debug, Clone, Default, Serialize)]
-pub struct EntityAssemblyTrace {
-    pub entity_id: String,
-    pub source_id: String,
-    pub inspected_candidates: usize,
-    pub selected_chunk_ids: Vec<String>,
-    pub selected_chunk_scores: Vec<f32>,
-    pub skipped_due_budget: usize,
-}
--- a/composite-retrieval/src/pipeline/mod.rs
+++ b/composite-retrieval/src/pipeline/mod.rs
@@ -1,212 +0,0 @@
-mod config;
-mod diagnostics;
-mod stages;
-mod state;
-
-pub use config::{RetrievalConfig, RetrievalTuning};
-pub use diagnostics::{
-    AssembleStats, ChunkEnrichmentStats, CollectCandidatesStats, EntityAssemblyTrace,
-    PipelineDiagnostics,
-};
-
-use crate::{reranking::RerankerLease, RetrievedEntity};
-use async_openai::Client;
-use common::{error::AppError, storage::db::SurrealDbClient};
-use tracing::info;
-
-#[derive(Debug)]
-pub struct PipelineRunOutput {
-    pub results: Vec<RetrievedEntity>,
-    pub diagnostics: Option<PipelineDiagnostics>,
-    pub stage_timings: PipelineStageTimings,
-}
-
-#[derive(Debug, Clone, Default, serde::Serialize)]
-pub struct PipelineStageTimings {
-    pub collect_candidates_ms: u128,
-    pub graph_expansion_ms: u128,
-    pub chunk_attach_ms: u128,
-    pub rerank_ms: u128,
-    pub assemble_ms: u128,
-}
-
-impl PipelineStageTimings {
-    fn record_collect_candidates(&mut self, duration: std::time::Duration) {
-        self.collect_candidates_ms += duration.as_millis() as u128;
-    }
-
-    fn record_graph_expansion(&mut self, duration: std::time::Duration) {
-        self.graph_expansion_ms += duration.as_millis() as u128;
-    }
-
-    fn record_chunk_attach(&mut self, duration: std::time::Duration) {
-        self.chunk_attach_ms += duration.as_millis() as u128;
-    }
-
-    fn record_rerank(&mut self, duration: std::time::Duration) {
-        self.rerank_ms += duration.as_millis() as u128;
-    }
-
-    fn record_assemble(&mut self, duration: std::time::Duration) {
-        self.assemble_ms += duration.as_millis() as u128;
-    }
-}
-
-/// Drives the retrieval pipeline from embedding through final assembly.
-pub async fn run_pipeline(
-    db_client: &SurrealDbClient,
-    openai_client: &Client<async_openai::config::OpenAIConfig>,
-    input_text: &str,
-    user_id: &str,
-    config: RetrievalConfig,
-    reranker: Option<RerankerLease>,
-) -> Result<Vec<RetrievedEntity>, AppError> {
-    let input_chars = input_text.chars().count();
-    let input_preview: String = input_text.chars().take(120).collect();
-    let input_preview_clean = input_preview.replace('\n', " ");
-    let preview_len = input_preview_clean.chars().count();
-    info!(
-        %user_id,
-        input_chars,
-        preview_truncated = input_chars > preview_len,
-        preview = %input_preview_clean,
-        "Starting ingestion retrieval pipeline"
-    );
-    let ctx = stages::PipelineContext::new(
-        db_client,
-        openai_client,
-        input_text.to_owned(),
-        user_id.to_owned(),
-        config,
-        reranker,
-    );
-    let outcome = run_pipeline_internal(ctx, false).await?;
-
-    Ok(outcome.results)
-}
-
-pub async fn run_pipeline_with_embedding(
-    db_client: &SurrealDbClient,
-    openai_client: &Client<async_openai::config::OpenAIConfig>,
-    query_embedding: Vec<f32>,
-    input_text: &str,
-    user_id: &str,
-    config: RetrievalConfig,
-    reranker: Option<RerankerLease>,
-) -> Result<Vec<RetrievedEntity>, AppError> {
-    let ctx = stages::PipelineContext::with_embedding(
-        db_client,
-        openai_client,
-        query_embedding,
-        input_text.to_owned(),
-        user_id.to_owned(),
-        config,
-        reranker,
-    );
-    let outcome = run_pipeline_internal(ctx, false).await?;
-
-    Ok(outcome.results)
-}
-
-/// Runs the pipeline with a precomputed embedding and returns stage metrics.
-pub async fn run_pipeline_with_embedding_with_metrics(
-    db_client: &SurrealDbClient,
-    openai_client: &Client<async_openai::config::OpenAIConfig>,
-    query_embedding: Vec<f32>,
-    input_text: &str,
-    user_id: &str,
-    config: RetrievalConfig,
-    reranker: Option<RerankerLease>,
-) -> Result<PipelineRunOutput, AppError> {
-    let ctx = stages::PipelineContext::with_embedding(
-        db_client,
-        openai_client,
-        query_embedding,
-        input_text.to_owned(),
-        user_id.to_owned(),
-        config,
-        reranker,
-    );
-
-    run_pipeline_internal(ctx, false).await
-}
-
-pub async fn run_pipeline_with_embedding_with_diagnostics(
-    db_client: &SurrealDbClient,
-    openai_client: &Client<async_openai::config::OpenAIConfig>,
-    query_embedding: Vec<f32>,
-    input_text: &str,
-    user_id: &str,
-    config: RetrievalConfig,
-    reranker: Option<RerankerLease>,
-) -> Result<PipelineRunOutput, AppError> {
-    let ctx = stages::PipelineContext::with_embedding(
-        db_client,
-        openai_client,
-        query_embedding,
-        input_text.to_owned(),
-        user_id.to_owned(),
-        config,
-        reranker,
-    );
-
-    run_pipeline_internal(ctx, true).await
-}
-
-/// Helper exposed for tests to convert retrieved entities into downstream prompt JSON.
-pub fn retrieved_entities_to_json(entities: &[RetrievedEntity]) -> serde_json::Value {
-    serde_json::json!(entities
-        .iter()
-        .map(|entry| {
-            serde_json::json!({
-                "KnowledgeEntity": {
-                    "id": entry.entity.id,
-                    "name": entry.entity.name,
-                    "description": entry.entity.description,
-                    "score": round_score(entry.score),
-                    "chunks": entry.chunks.iter().map(|chunk| {
-                        serde_json::json!({
-                            "score": round_score(chunk.score),
-                            "content": chunk.chunk.chunk
-                        })
-                    }).collect::<Vec<_>>()
-                }
-            })
-        })
-        .collect::<Vec<_>>())
-}
-
-async fn run_pipeline_internal(
-    mut ctx: stages::PipelineContext<'_>,
-    capture_diagnostics: bool,
-) -> Result<PipelineRunOutput, AppError> {
-    if capture_diagnostics {
-        ctx.enable_diagnostics();
-    }
-
-    let results = drive_pipeline(&mut ctx).await?;
-    let diagnostics = ctx.take_diagnostics();
-
-    Ok(PipelineRunOutput {
-        results,
-        diagnostics,
-        stage_timings: ctx.take_stage_timings(),
-    })
-}
-
-async fn drive_pipeline(
-    ctx: &mut stages::PipelineContext<'_>,
-) -> Result<Vec<RetrievedEntity>, AppError> {
-    let machine = state::ready();
-    let machine = stages::embed(machine, ctx).await?;
-    let machine = stages::collect_candidates(machine, ctx).await?;
-    let machine = stages::expand_graph(machine, ctx).await?;
-    let machine = stages::attach_chunks(machine, ctx).await?;
-    let machine = stages::rerank(machine, ctx).await?;
-    let results = stages::assemble(machine, ctx)?;
-    Ok(results)
-}
-
-fn round_score(value: f32) -> f64 {
-    (f64::from(value) * 1000.0).round() / 1000.0
-}
--- a/composite-retrieval/src/pipeline/stages/mod.rs
+++ b/composite-retrieval/src/pipeline/stages/mod.rs
--- a/composite-retrieval/src/pipeline/state.rs
+++ b/composite-retrieval/src/pipeline/state.rs
@@ -1,27 +0,0 @@
-use state_machines::state_machine;
-
-state_machine! {
-    name: HybridRetrievalMachine,
-    state: HybridRetrievalState,
-    initial: Ready,
-    states: [Ready, Embedded, CandidatesLoaded, GraphExpanded, ChunksAttached, Reranked, Completed, Failed],
-    events {
-        embed { transition: { from: Ready, to: Embedded } }
-        collect_candidates { transition: { from: Embedded, to: CandidatesLoaded } }
-        expand_graph { transition: { from: CandidatesLoaded, to: GraphExpanded } }
-        attach_chunks { transition: { from: GraphExpanded, to: ChunksAttached } }
-        rerank { transition: { from: ChunksAttached, to: Reranked } }
-        assemble { transition: { from: Reranked, to: Completed } }
-        abort {
-            transition: { from: Ready, to: Failed }
-            transition: { from: CandidatesLoaded, to: Failed }
-            transition: { from: GraphExpanded, to: Failed }
-            transition: { from: ChunksAttached, to: Failed }
-            transition: { from: Reranked, to: Failed }
-        }
-    }
-}
-
-pub fn ready() -> HybridRetrievalMachine<(), Ready> {
-    HybridRetrievalMachine::new(())
-}
--- a/composite-retrieval/src/reranking/mod.rs
+++ b/composite-retrieval/src/reranking/mod.rs
@@ -1,170 +0,0 @@
-use std::{
-    env, fs,
-    path::{Path, PathBuf},
-    sync::{
-        atomic::{AtomicUsize, Ordering},
-        Arc,
-    },
-    thread::available_parallelism,
-};
-
-use common::{error::AppError, utils::config::AppConfig};
-use fastembed::{RerankInitOptions, RerankResult, TextRerank};
-use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore};
-use tracing::debug;
-
-static NEXT_ENGINE: AtomicUsize = AtomicUsize::new(0);
-
-fn pick_engine_index(pool_len: usize) -> usize {
-    let n = NEXT_ENGINE.fetch_add(1, Ordering::Relaxed);
-    n % pool_len
-}
-
-pub struct RerankerPool {
-    engines: Vec<Arc<Mutex<TextRerank>>>,
-    semaphore: Arc<Semaphore>,
-}
-
-impl RerankerPool {
-    /// Build the pool at startup.
-    /// `pool_size` controls max parallel reranks.
-    pub fn new(pool_size: usize) -> Result<Arc<Self>, AppError> {
-        Self::new_with_options(pool_size, RerankInitOptions::default())
-    }
-
-    fn new_with_options(
-        pool_size: usize,
-        init_options: RerankInitOptions,
-    ) -> Result<Arc<Self>, AppError> {
-        if pool_size == 0 {
-            return Err(AppError::Validation(
-                "RERANKING_POOL_SIZE must be greater than zero".to_string(),
-            ));
-        }
-
-        fs::create_dir_all(&init_options.cache_dir)?;
-
-        let mut engines = Vec::with_capacity(pool_size);
-        for x in 0..pool_size {
-            debug!("Creating reranking engine: {x}");
-            let model = TextRerank::try_new(init_options.clone())
-                .map_err(|e| AppError::InternalError(e.to_string()))?;
-            engines.push(Arc::new(Mutex::new(model)));
-        }
-
-        Ok(Arc::new(Self {
-            engines,
-            semaphore: Arc::new(Semaphore::new(pool_size)),
-        }))
-    }
-
-    /// Initialize a pool using application configuration.
-    pub fn maybe_from_config(config: &AppConfig) -> Result<Option<Arc<Self>>, AppError> {
-        if !config.reranking_enabled {
-            return Ok(None);
-        }
-
-        let pool_size = config.reranking_pool_size.unwrap_or_else(default_pool_size);
-
-        let init_options = build_rerank_init_options(config)?;
-        Self::new_with_options(pool_size, init_options).map(Some)
-    }
-
-    /// Check out capacity + pick an engine.
-    /// This returns a lease that can perform rerank().
-    pub async fn checkout(self: &Arc<Self>) -> RerankerLease {
-        // Acquire a permit. This enforces backpressure.
-        let permit = self
-            .semaphore
-            .clone()
-            .acquire_owned()
-            .await
-            .expect("semaphore closed");
-
-        // Pick an engine.
-        // This is naive: just pick based on a simple modulo counter.
-        // We use an atomic counter to avoid always choosing index 0.
-        let idx = pick_engine_index(self.engines.len());
-        let engine = self.engines[idx].clone();
-
-        RerankerLease {
-            _permit: permit,
-            engine,
-        }
-    }
-}
-
-fn default_pool_size() -> usize {
-    available_parallelism()
-        .map(|value| value.get().min(2))
-        .unwrap_or(2)
-        .max(1)
-}
-
-fn is_truthy(value: &str) -> bool {
-    matches!(
-        value.trim().to_ascii_lowercase().as_str(),
-        "1" | "true" | "yes" | "on"
-    )
-}
-
-fn build_rerank_init_options(config: &AppConfig) -> Result<RerankInitOptions, AppError> {
-    let mut options = RerankInitOptions::default();
-
-    let cache_dir = config
-        .fastembed_cache_dir
-        .as_ref()
-        .map(PathBuf::from)
-        .or_else(|| env::var("RERANKING_CACHE_DIR").ok().map(PathBuf::from))
-        .or_else(|| env::var("FASTEMBED_CACHE_DIR").ok().map(PathBuf::from))
-        .unwrap_or_else(|| {
-            Path::new(&config.data_dir)
-                .join("fastembed")
-                .join("reranker")
-        });
-    fs::create_dir_all(&cache_dir)?;
-    options.cache_dir = cache_dir;
-
-    let show_progress = config
-        .fastembed_show_download_progress
-        .or_else(|| env_bool("RERANKING_SHOW_DOWNLOAD_PROGRESS"))
-        .or_else(|| env_bool("FASTEMBED_SHOW_DOWNLOAD_PROGRESS"))
-        .unwrap_or(true);
-    options.show_download_progress = show_progress;
-
-    if let Some(max_length) = config.fastembed_max_length.or_else(|| {
-        env::var("RERANKING_MAX_LENGTH")
-            .ok()
-            .and_then(|value| value.parse().ok())
-    }) {
-        options.max_length = max_length;
-    }
-
-    Ok(options)
-}
-
-fn env_bool(key: &str) -> Option<bool> {
-    env::var(key).ok().map(|value| is_truthy(&value))
-}
-
-/// Active lease on a single TextRerank instance.
-pub struct RerankerLease {
-    // When this drops the semaphore permit is released.
-    _permit: OwnedSemaphorePermit,
-    engine: Arc<Mutex<TextRerank>>,
-}
-
-impl RerankerLease {
-    pub async fn rerank(
-        &self,
-        query: &str,
-        documents: Vec<String>,
-    ) -> Result<Vec<RerankResult>, AppError> {
-        // Lock this specific engine so we get &mut TextRerank
-        let mut guard = self.engine.lock().await;
-
-        guard
-            .rerank(query.to_owned(), documents, false, None)
-            .map_err(|e| AppError::InternalError(e.to_string()))
-    }
-}
--- a/composite-retrieval/src/scoring.rs
+++ b/composite-retrieval/src/scoring.rs
@@ -1,183 +0,0 @@
-use std::cmp::Ordering;
-
-use common::storage::types::StoredObject;
-
-/// Holds optional subscores gathered from different retrieval signals.
-#[derive(Debug, Clone, Copy, Default)]
-pub struct Scores {
-    pub fts: Option<f32>,
-    pub vector: Option<f32>,
-    pub graph: Option<f32>,
-}
-
-/// Generic wrapper combining an item with its accumulated retrieval scores.
-#[derive(Debug, Clone)]
-pub struct Scored<T> {
-    pub item: T,
-    pub scores: Scores,
-    pub fused: f32,
-}
-
-impl<T> Scored<T> {
-    pub fn new(item: T) -> Self {
-        Self {
-            item,
-            scores: Scores::default(),
-            fused: 0.0,
-        }
-    }
-
-    pub const fn with_vector_score(mut self, score: f32) -> Self {
-        self.scores.vector = Some(score);
-        self
-    }
-
-    pub const fn with_fts_score(mut self, score: f32) -> Self {
-        self.scores.fts = Some(score);
-        self
-    }
-
-    pub const fn with_graph_score(mut self, score: f32) -> Self {
-        self.scores.graph = Some(score);
-        self
-    }
-
-    pub const fn update_fused(&mut self, fused: f32) {
-        self.fused = fused;
-    }
-}
-
-/// Weights used for linear score fusion.
-#[derive(Debug, Clone, Copy)]
-pub struct FusionWeights {
-    pub vector: f32,
-    pub fts: f32,
-    pub graph: f32,
-    pub multi_bonus: f32,
-}
-
-impl Default for FusionWeights {
-    fn default() -> Self {
-        Self {
-            vector: 0.5,
-            fts: 0.3,
-            graph: 0.2,
-            multi_bonus: 0.02,
-        }
-    }
-}
-
-pub const fn clamp_unit(value: f32) -> f32 {
-    value.clamp(0.0, 1.0)
-}
-
-pub fn distance_to_similarity(distance: f32) -> f32 {
-    if !distance.is_finite() {
-        return 0.0;
-    }
-    clamp_unit(1.0 / (1.0 + distance.max(0.0)))
-}
-
-pub fn min_max_normalize(scores: &[f32]) -> Vec<f32> {
-    if scores.is_empty() {
-        return Vec::new();
-    }
-
-    let mut min = f32::MAX;
-    let mut max = f32::MIN;
-
-    for s in scores {
-        if !s.is_finite() {
-            continue;
-        }
-        if *s < min {
-            min = *s;
-        }
-        if *s > max {
-            max = *s;
-        }
-    }
-
-    if !min.is_finite() || !max.is_finite() {
-        return scores.iter().map(|_| 0.0).collect();
-    }
-
-    if (max - min).abs() < f32::EPSILON {
-        return vec![1.0; scores.len()];
-    }
-
-    scores
-        .iter()
-        .map(|score| {
-            if score.is_finite() {
-                clamp_unit((score - min) / (max - min))
-            } else {
-                0.0
-            }
-        })
-        .collect()
-}
-
-pub fn fuse_scores(scores: &Scores, weights: FusionWeights) -> f32 {
-    let vector = scores.vector.unwrap_or(0.0);
-    let fts = scores.fts.unwrap_or(0.0);
-    let graph = scores.graph.unwrap_or(0.0);
-
-    let mut fused = graph.mul_add(
-        weights.graph,
-        vector.mul_add(weights.vector, fts * weights.fts),
-    );
-
-    let signals_present = scores
-        .vector
-        .iter()
-        .chain(scores.fts.iter())
-        .chain(scores.graph.iter())
-        .count();
-    if signals_present >= 2 {
-        fused += weights.multi_bonus;
-    }
-
-    clamp_unit(fused)
-}
-
-pub fn merge_scored_by_id<T>(
-    target: &mut std::collections::HashMap<String, Scored<T>>,
-    incoming: Vec<Scored<T>>,
-) where
-    T: StoredObject + Clone,
-{
-    for scored in incoming {
-        let id = scored.item.get_id().to_owned();
-        target
-            .entry(id)
-            .and_modify(|existing| {
-                if let Some(score) = scored.scores.vector {
-                    existing.scores.vector = Some(score);
-                }
-                if let Some(score) = scored.scores.fts {
-                    existing.scores.fts = Some(score);
-                }
-                if let Some(score) = scored.scores.graph {
-                    existing.scores.graph = Some(score);
-                }
-            })
-            .or_insert_with(|| Scored {
-                item: scored.item.clone(),
-                scores: scored.scores,
-                fused: scored.fused,
-            });
-    }
-}
-
-pub fn sort_by_fused_desc<T>(items: &mut [Scored<T>])
-where
-    T: StoredObject,
-{
-    items.sort_by(|a, b| {
-        b.fused
-            .partial_cmp(&a.fused)
-            .unwrap_or(Ordering::Equal)
-            .then_with(|| a.item.get_id().cmp(b.item.get_id()))
-    });
-}
--- a/composite-retrieval/src/vector.rs
+++ b/composite-retrieval/src/vector.rs
@@ -1,158 +0,0 @@
-use std::collections::HashMap;
-
-use common::storage::types::file_info::deserialize_flexible_id;
-use common::{
-    error::AppError,
-    storage::{db::SurrealDbClient, types::StoredObject},
-    utils::embedding::generate_embedding,
-};
-use serde::Deserialize;
-use surrealdb::sql::Thing;
-
-use crate::scoring::{clamp_unit, distance_to_similarity, Scored};
-
-/// Compares vectors and retrieves a number of items from the specified table.
-///
-/// This function generates embeddings for the input text, constructs a query to find the closest matches in the database,
-/// and then deserializes the results into the specified type `T`.
-///
-/// # Arguments
-///
-/// * `take` - The number of items to retrieve from the database.
-/// * `input_text` - The text to generate embeddings for.
-/// * `db_client` - The SurrealDB client to use for querying the database.
-/// * `table` - The table to query in the database.
-/// * `openai_client` - The OpenAI client to use for generating embeddings.
-/// * 'user_id`-  The user id of the current user.
-///
-/// # Returns
-///
-/// A vector of type `T` containing the closest matches to the input text. Returns a `ProcessingError` if an error occurs.
-///
-/// # Type Parameters
-///
-/// * `T` - The type to deserialize the query results into. Must implement `serde::Deserialize`.
-pub async fn find_items_by_vector_similarity<T>(
-    take: usize,
-    input_text: &str,
-    db_client: &SurrealDbClient,
-    table: &str,
-    openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
-    user_id: &str,
-) -> Result<Vec<Scored<T>>, AppError>
-where
-    T: for<'de> serde::Deserialize<'de> + StoredObject,
-{
-    // Generate embeddings
-    let input_embedding = generate_embedding(openai_client, input_text, db_client).await?;
-    find_items_by_vector_similarity_with_embedding(take, input_embedding, db_client, table, user_id)
-        .await
-}
-
-#[derive(Debug, Deserialize)]
-struct DistanceRow {
-    #[serde(deserialize_with = "deserialize_flexible_id")]
-    id: String,
-    distance: Option<f32>,
-}
-
-pub async fn find_items_by_vector_similarity_with_embedding<T>(
-    take: usize,
-    query_embedding: Vec<f32>,
-    db_client: &SurrealDbClient,
-    table: &str,
-    user_id: &str,
-) -> Result<Vec<Scored<T>>, AppError>
-where
-    T: for<'de> serde::Deserialize<'de> + StoredObject,
-{
-    let embedding_literal = serde_json::to_string(&query_embedding)
-        .map_err(|err| AppError::InternalError(format!("Failed to serialize embedding: {err}")))?;
-
-    let closest_query = format!(
-        "SELECT id, vector::distance::knn() AS distance \
-         FROM {table} \
-         WHERE user_id = $user_id AND embedding <|{take},40|> {embedding} \
-         LIMIT $limit",
-        table = table,
-        take = take,
-        embedding = embedding_literal
-    );
-
-    let mut response = db_client
-        .query(closest_query)
-        .bind(("user_id", user_id.to_owned()))
-        .bind(("limit", take as i64))
-        .await?;
-
-    let distance_rows: Vec<DistanceRow> = response.take(0)?;
-
-    if distance_rows.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let ids: Vec<String> = distance_rows.iter().map(|row| row.id.clone()).collect();
-    let thing_ids: Vec<Thing> = ids
-        .iter()
-        .map(|id| Thing::from((table, id.as_str())))
-        .collect();
-
-    let mut items_response = db_client
-        .query("SELECT * FROM type::table($table) WHERE id IN $things AND user_id = $user_id")
-        .bind(("table", table.to_owned()))
-        .bind(("things", thing_ids.clone()))
-        .bind(("user_id", user_id.to_owned()))
-        .await?;
-
-    let items: Vec<T> = items_response.take(0)?;
-
-    let mut item_map: HashMap<String, T> = items
-        .into_iter()
-        .map(|item| (item.get_id().to_owned(), item))
-        .collect();
-
-    let mut min_distance = f32::MAX;
-    let mut max_distance = f32::MIN;
-
-    for row in &distance_rows {
-        if let Some(distance) = row.distance {
-            if distance.is_finite() {
-                if distance < min_distance {
-                    min_distance = distance;
-                }
-                if distance > max_distance {
-                    max_distance = distance;
-                }
-            }
-        }
-    }
-
-    let normalize = min_distance.is_finite()
-        && max_distance.is_finite()
-        && (max_distance - min_distance).abs() > f32::EPSILON;
-
-    let mut scored = Vec::with_capacity(distance_rows.len());
-    for row in distance_rows {
-        if let Some(item) = item_map.remove(&row.id) {
-            let similarity = row
-                .distance
-                .map(|distance| {
-                    if normalize {
-                        let span = max_distance - min_distance;
-                        if span.abs() < f32::EPSILON {
-                            1.0
-                        } else {
-                            let normalized = 1.0 - ((distance - min_distance) / span);
-                            clamp_unit(normalized)
-                        }
-                    } else {
-                        distance_to_similarity(distance)
-                    }
-                })
-                .unwrap_or_default();
-            scored.push(Scored::new(item).with_vector_score(similarity));
-        }
-    }
-
-    Ok(scored)
-}