feat: reranking with fastembed added

2026-07-21 12:08:38 +02:00 · 2025-10-27 13:05:10 +01:00
parent a0e9387c76
commit 72578296db
25 changed files with 1586 additions and 202 deletions
@@ -17,6 +17,9 @@ pub struct RetrievalTuning {
    pub graph_score_decay: f32,
    pub graph_seed_min_score: f32,
    pub graph_vector_inheritance: f32,
+    pub rerank_blend_weight: f32,
+    pub rerank_scores_only: bool,
+    pub rerank_keep_top: usize,
 }

 impl Default for RetrievalTuning {
@@ -36,6 +39,9 @@ impl Default for RetrievalTuning {
            graph_score_decay: 0.75,
            graph_seed_min_score: 0.4,
            graph_vector_inheritance: 0.6,
+            rerank_blend_weight: 0.65,
+            rerank_scores_only: false,
+            rerank_keep_top: 8,
        }
    }
 }
@@ -4,7 +4,7 @@ mod state;

 pub use config::{RetrievalConfig, RetrievalTuning};

-use crate::RetrievedEntity;
+use crate::{reranking::RerankerLease, RetrievedEntity};
 use async_openai::Client;
 use common::{error::AppError, storage::db::SurrealDbClient};
 use tracing::info;
@@ -16,6 +16,7 @@ pub async fn run_pipeline(
    input_text: &str,
    user_id: &str,
    config: RetrievalConfig,
+    reranker: Option<RerankerLease>,
 ) -> Result<Vec<RetrievedEntity>, AppError> {
    let machine = state::ready();
    let input_chars = input_text.chars().count();
@@ -35,11 +36,13 @@ pub async fn run_pipeline(
        input_text.to_owned(),
        user_id.to_owned(),
        config,
+        reranker,
    );
    let machine = stages::embed(machine, &mut ctx).await?;
    let machine = stages::collect_candidates(machine, &mut ctx).await?;
    let machine = stages::expand_graph(machine, &mut ctx).await?;
    let machine = stages::attach_chunks(machine, &mut ctx).await?;
+    let machine = stages::rerank(machine, &mut ctx).await?;
    let results = stages::assemble(machine, &mut ctx)?;

    Ok(results)
@@ -53,6 +56,7 @@ pub async fn run_pipeline_with_embedding(
    input_text: &str,
    user_id: &str,
    config: RetrievalConfig,
+    reranker: Option<RerankerLease>,
 ) -> Result<Vec<RetrievedEntity>, AppError> {
    let machine = state::ready();
    let mut ctx = stages::PipelineContext::with_embedding(
@@ -62,11 +66,13 @@ pub async fn run_pipeline_with_embedding(
        input_text.to_owned(),
        user_id.to_owned(),
        config,
+        reranker,
    );
    let machine = stages::embed(machine, &mut ctx).await?;
    let machine = stages::collect_candidates(machine, &mut ctx).await?;
    let machine = stages::expand_graph(machine, &mut ctx).await?;
    let machine = stages::attach_chunks(machine, &mut ctx).await?;
+    let machine = stages::rerank(machine, &mut ctx).await?;
    let results = stages::assemble(machine, &mut ctx)?;

    Ok(results)
@@ -7,6 +7,7 @@ use common::{
    },
    utils::embedding::generate_embedding,
 };
+use fastembed::RerankResult;
 use futures::{stream::FuturesUnordered, StreamExt};
 use state_machines::core::GuardError;
 use std::collections::{HashMap, HashSet};
@@ -15,6 +16,7 @@ use tracing::{debug, instrument, warn};
 use crate::{
    fts::find_items_by_fts,
    graph::{find_entities_by_relationship_by_id, find_entities_by_source_ids},
+    reranking::RerankerLease,
    scoring::{
        clamp_unit, fuse_scores, merge_scored_by_id, min_max_normalize, sort_by_fused_desc,
        FusionWeights, Scored,
@@ -27,6 +29,7 @@ use super::{
    config::RetrievalConfig,
    state::{
        CandidatesLoaded, ChunksAttached, Embedded, GraphExpanded, HybridRetrievalMachine, Ready,
+        Reranked,
    },
 };

@@ -41,6 +44,7 @@ pub struct PipelineContext<'a> {
    pub chunk_candidates: HashMap<String, Scored<TextChunk>>,
    pub filtered_entities: Vec<Scored<KnowledgeEntity>>,
    pub chunk_values: Vec<Scored<TextChunk>>,
+    pub reranker: Option<RerankerLease>,
 }

 impl<'a> PipelineContext<'a> {
@@ -50,6 +54,7 @@ impl<'a> PipelineContext<'a> {
        input_text: String,
        user_id: String,
        config: RetrievalConfig,
+        reranker: Option<RerankerLease>,
    ) -> Self {
        Self {
            db_client,
@@ -62,6 +67,7 @@ impl<'a> PipelineContext<'a> {
            chunk_candidates: HashMap::new(),
            filtered_entities: Vec::new(),
            chunk_values: Vec::new(),
+            reranker,
        }
    }

@@ -73,8 +79,16 @@ impl<'a> PipelineContext<'a> {
        input_text: String,
        user_id: String,
        config: RetrievalConfig,
+        reranker: Option<RerankerLease>,
    ) -> Self {
-        let mut ctx = Self::new(db_client, openai_client, input_text, user_id, config);
+        let mut ctx = Self::new(
+            db_client,
+            openai_client,
+            input_text,
+            user_id,
+            config,
+            reranker,
+        );
        ctx.query_embedding = Some(query_embedding);
        ctx
    }
@@ -327,9 +341,58 @@ pub async fn attach_chunks(
 }

 #[instrument(level = "trace", skip_all)]
-pub fn assemble(
+pub async fn rerank(
    machine: HybridRetrievalMachine<(), ChunksAttached>,
    ctx: &mut PipelineContext<'_>,
+) -> Result<HybridRetrievalMachine<(), Reranked>, AppError> {
+    let mut applied = false;
+
+    if let Some(reranker) = ctx.reranker.as_ref() {
+        if ctx.filtered_entities.len() > 1 {
+            let documents = build_rerank_documents(ctx, ctx.config.tuning.max_chunks_per_entity);
+
+            if documents.len() > 1 {
+                match reranker.rerank(&ctx.input_text, documents).await {
+                    Ok(results) if !results.is_empty() => {
+                        apply_rerank_results(ctx, results);
+                        applied = true;
+                    }
+                    Ok(_) => {
+                        debug!("Reranker returned no results; retaining original ordering");
+                    }
+                    Err(err) => {
+                        warn!(
+                            error = %err,
+                            "Reranking failed; continuing with original ordering"
+                        );
+                    }
+                }
+            } else {
+                debug!(
+                    document_count = documents.len(),
+                    "Skipping reranking stage; insufficient document context"
+                );
+            }
+        } else {
+            debug!("Skipping reranking stage; less than two entities available");
+        }
+    } else {
+        debug!("No reranker lease provided; skipping reranking stage");
+    }
+
+    if applied {
+        debug!("Applied reranking adjustments to candidate ordering");
+    }
+
+    machine
+        .rerank()
+        .map_err(|(_, guard)| map_guard_error("rerank", guard))
+}
+
+#[instrument(level = "trace", skip_all)]
+pub fn assemble(
+    machine: HybridRetrievalMachine<(), Reranked>,
+    ctx: &mut PipelineContext<'_>,
 ) -> Result<Vec<RetrievedEntity>, AppError> {
    debug!("Assembling final retrieved entities");
    let tuning = &ctx.config.tuning;
@@ -561,6 +624,113 @@ async fn enrich_chunks_from_entities(
    Ok(())
 }

+fn build_rerank_documents(ctx: &PipelineContext<'_>, max_chunks_per_entity: usize) -> Vec<String> {
+    if ctx.filtered_entities.is_empty() {
+        return Vec::new();
+    }
+
+    let mut chunk_by_source: HashMap<&str, Vec<&Scored<TextChunk>>> = HashMap::new();
+    for chunk in &ctx.chunk_values {
+        chunk_by_source
+            .entry(chunk.item.source_id.as_str())
+            .or_default()
+            .push(chunk);
+    }
+
+    ctx.filtered_entities
+        .iter()
+        .map(|entity| {
+            let mut doc = format!(
+                "Name: {}\nType: {:?}\nDescription: {}\n",
+                entity.item.name, entity.item.entity_type, entity.item.description
+            );
+
+            if let Some(chunks) = chunk_by_source.get(entity.item.source_id.as_str()) {
+                let mut chunk_refs = chunks.clone();
+                chunk_refs.sort_by(|a, b| {
+                    b.fused
+                        .partial_cmp(&a.fused)
+                        .unwrap_or(std::cmp::Ordering::Equal)
+                });
+
+                let mut header_added = false;
+                for chunk in chunk_refs.into_iter().take(max_chunks_per_entity.max(1)) {
+                    let snippet = chunk.item.chunk.trim();
+                    if snippet.is_empty() {
+                        continue;
+                    }
+                    if !header_added {
+                        doc.push_str("Chunks:\n");
+                        header_added = true;
+                    }
+                    doc.push_str("- ");
+                    doc.push_str(snippet);
+                    doc.push('\n');
+                }
+            }
+
+            doc
+        })
+        .collect()
+}
+
+fn apply_rerank_results(ctx: &mut PipelineContext<'_>, results: Vec<RerankResult>) {
+    if results.is_empty() || ctx.filtered_entities.is_empty() {
+        return;
+    }
+
+    let mut remaining: Vec<Option<Scored<KnowledgeEntity>>> =
+        std::mem::take(&mut ctx.filtered_entities)
+            .into_iter()
+            .map(Some)
+            .collect();
+
+    let raw_scores: Vec<f32> = results.iter().map(|r| r.score).collect();
+    let normalized_scores = min_max_normalize(&raw_scores);
+
+    let use_only = ctx.config.tuning.rerank_scores_only;
+    let blend = if use_only {
+        1.0
+    } else {
+        clamp_unit(ctx.config.tuning.rerank_blend_weight)
+    };
+    let mut reranked: Vec<Scored<KnowledgeEntity>> = Vec::with_capacity(remaining.len());
+    for (result, normalized) in results.into_iter().zip(normalized_scores.into_iter()) {
+        if let Some(slot) = remaining.get_mut(result.index) {
+            if let Some(mut candidate) = slot.take() {
+                let original = candidate.fused;
+                let blended = if use_only {
+                    clamp_unit(normalized)
+                } else {
+                    clamp_unit(original * (1.0 - blend) + normalized * blend)
+                };
+                candidate.update_fused(blended);
+                reranked.push(candidate);
+            }
+        } else {
+            warn!(
+                result_index = result.index,
+                "Reranker returned out-of-range index; skipping"
+            );
+        }
+        if reranked.len() == remaining.len() {
+            break;
+        }
+    }
+
+    for slot in remaining.into_iter() {
+        if let Some(candidate) = slot {
+            reranked.push(candidate);
+        }
+    }
+
+    ctx.filtered_entities = reranked;
+    let keep_top = ctx.config.tuning.rerank_keep_top;
+    if keep_top > 0 && ctx.filtered_entities.len() > keep_top {
+        ctx.filtered_entities.truncate(keep_top);
+    }
+}
+
 fn estimate_tokens(text: &str, avg_chars_per_token: usize) -> usize {
    let chars = text.chars().count().max(1);
    (chars / avg_chars_per_token).max(1)
@@ -4,18 +4,20 @@ state_machine! {
    name: HybridRetrievalMachine,
    state: HybridRetrievalState,
    initial: Ready,
-    states: [Ready, Embedded, CandidatesLoaded, GraphExpanded, ChunksAttached, Completed, Failed],
+    states: [Ready, Embedded, CandidatesLoaded, GraphExpanded, ChunksAttached, Reranked, Completed, Failed],
    events {
        embed { transition: { from: Ready, to: Embedded } }
        collect_candidates { transition: { from: Embedded, to: CandidatesLoaded } }
        expand_graph { transition: { from: CandidatesLoaded, to: GraphExpanded } }
        attach_chunks { transition: { from: GraphExpanded, to: ChunksAttached } }
-        assemble { transition: { from: ChunksAttached, to: Completed } }
+        rerank { transition: { from: ChunksAttached, to: Reranked } }
+        assemble { transition: { from: Reranked, to: Completed } }
        abort {
            transition: { from: Ready, to: Failed }
            transition: { from: CandidatesLoaded, to: Failed }
            transition: { from: GraphExpanded, to: Failed }
            transition: { from: ChunksAttached, to: Failed }
+            transition: { from: Reranked, to: Failed }
        }
    }
 }