benchmarks: ready for hybrid revised

2026-06-30 10:01:40 +02:00 · 2025-12-03 11:38:07 +01:00
parent 5e5053039a
commit c60efb2af7
22 changed files with 760 additions and 476 deletions
@@ -63,6 +63,7 @@ pub struct RetrievalTuning {
    pub rerank_blend_weight: f32,
    pub rerank_scores_only: bool,
    pub rerank_keep_top: usize,
+    pub chunk_result_cap: usize,
 }

 impl Default for RetrievalTuning {
@@ -86,6 +87,7 @@ impl Default for RetrievalTuning {
            rerank_blend_weight: 0.65,
            rerank_scores_only: false,
            rerank_keep_top: 8,
+            chunk_result_cap: 5,
        }
    }
 }
@@ -675,7 +675,13 @@ pub fn assemble_chunks(ctx: &mut PipelineContext<'_>) -> Result<(), AppError> {
        ctx.config.tuning.lexical_match_weight,
    );

-    let limit = ctx.config.tuning.chunk_vector_take.max(1);
+    // Limit how many chunks we return to keep context size reasonable.
+    let limit = ctx
+        .config
+        .tuning
+        .chunk_result_cap
+        .max(1)
+        .min(ctx.config.tuning.chunk_vector_take.max(1));
    if chunk_values.len() > limit {
        chunk_values.truncate(limit);
    }
@@ -29,7 +29,10 @@ impl RerankerPool {
    /// Build the pool at startup.
    /// `pool_size` controls max parallel reranks.
    pub fn new(pool_size: usize) -> Result<Arc<Self>, AppError> {
-        Self::new_with_options(pool_size, RerankInitOptions::default())
+        Self::new_with_options(
+            pool_size,
+            RerankInitOptions::new(fastembed::RerankerModel::JINARerankerV1TurboEn),
+        )
    }

    fn new_with_options(