mirror of
https://github.com/perstarkse/minne.git
synced 2026-07-03 11:31:43 +02:00
chore: refactor retrieval pipeline to chunk-first RRF with derived entities and slimmer eval surface.
Collapse the multi-strategy entity engine into one benchmarked chunk retrieval path, derive entities from retrieved chunks, and update consumers, docs, and clippy fixes across the workspace.
This commit is contained in:
@@ -16,12 +16,9 @@ use futures::{
|
||||
};
|
||||
use json_stream_parser::JsonStreamParser;
|
||||
use minijinja::Value;
|
||||
use retrieval_pipeline::{
|
||||
answer_retrieval::{
|
||||
chunks_to_chat_context, create_chat_request, create_user_message_with_history,
|
||||
LLMResponseFormat,
|
||||
},
|
||||
retrieved_entities_to_json,
|
||||
use retrieval_pipeline::answer_retrieval::{
|
||||
chunks_to_chat_context, create_chat_request, create_user_message_with_history,
|
||||
LLMResponseFormat,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::from_str;
|
||||
@@ -189,11 +186,7 @@ struct ReferenceData {
|
||||
}
|
||||
|
||||
fn extract_reference_strings(response: &LLMResponseFormat) -> Vec<String> {
|
||||
response
|
||||
.references
|
||||
.iter()
|
||||
.map(|reference| reference.reference.clone())
|
||||
.collect()
|
||||
response.reference_ids()
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
@@ -362,10 +355,9 @@ async fn prepare_chat_request(
|
||||
None => None,
|
||||
};
|
||||
|
||||
let strategy = state.retrieval_strategy();
|
||||
let config = retrieval_pipeline::RetrievalConfig::for_chat(strategy);
|
||||
let config = retrieval_pipeline::RetrievalConfig::default();
|
||||
|
||||
let retrieval_result = match retrieval_pipeline::retrieve_entities(
|
||||
let retrieval_result = match retrieval_pipeline::retrieve(
|
||||
&state.db,
|
||||
&state.openai_client,
|
||||
Some(&*state.embedding_provider),
|
||||
@@ -387,12 +379,9 @@ async fn prepare_chat_request(
|
||||
let allowed_reference_ids = collect_reference_ids_from_retrieval(&retrieval_result);
|
||||
|
||||
let context_json = match retrieval_result {
|
||||
retrieval_pipeline::StrategyOutput::Chunks(chunks) => chunks_to_chat_context(&chunks),
|
||||
retrieval_pipeline::StrategyOutput::Entities(entities) => {
|
||||
retrieved_entities_to_json(&entities)
|
||||
}
|
||||
retrieval_pipeline::StrategyOutput::Search(search_result) => {
|
||||
chunks_to_chat_context(&search_result.chunks)
|
||||
retrieval_pipeline::RetrievalOutput::Chunks(chunks) => chunks_to_chat_context(&chunks),
|
||||
retrieval_pipeline::RetrievalOutput::WithEntities { chunks, .. } => {
|
||||
chunks_to_chat_context(&chunks)
|
||||
}
|
||||
};
|
||||
let formatted_user_message =
|
||||
|
||||
@@ -9,7 +9,7 @@ use common::{
|
||||
types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk, StoredObject},
|
||||
},
|
||||
};
|
||||
use retrieval_pipeline::StrategyOutput;
|
||||
use retrieval_pipeline::RetrievalOutput;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub(crate) const MAX_REFERENCE_COUNT: usize = 10;
|
||||
@@ -86,40 +86,29 @@ pub(crate) enum ReferenceLookupTarget {
|
||||
}
|
||||
|
||||
pub(crate) fn collect_reference_ids_from_retrieval(
|
||||
retrieval_result: &StrategyOutput,
|
||||
retrieval_result: &RetrievalOutput,
|
||||
) -> Vec<String> {
|
||||
let mut ids = Vec::new();
|
||||
let mut seen = HashSet::new();
|
||||
|
||||
let mut push_id = |id: String| {
|
||||
if seen.insert(id.clone()) {
|
||||
ids.push(id);
|
||||
}
|
||||
};
|
||||
|
||||
match retrieval_result {
|
||||
StrategyOutput::Chunks(chunks) => {
|
||||
RetrievalOutput::Chunks(chunks) => {
|
||||
for chunk in chunks {
|
||||
let id = chunk.chunk.id.clone();
|
||||
if seen.insert(id.clone()) {
|
||||
ids.push(id);
|
||||
}
|
||||
push_id(chunk.chunk.id.clone());
|
||||
}
|
||||
}
|
||||
StrategyOutput::Entities(entities) => {
|
||||
RetrievalOutput::WithEntities { chunks, entities } => {
|
||||
for chunk in chunks {
|
||||
push_id(chunk.chunk.id.clone());
|
||||
}
|
||||
for entity in entities {
|
||||
let id = entity.entity.id.clone();
|
||||
if seen.insert(id.clone()) {
|
||||
ids.push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
StrategyOutput::Search(search) => {
|
||||
for chunk in &search.chunks {
|
||||
let id = chunk.chunk.id.clone();
|
||||
if seen.insert(id.clone()) {
|
||||
ids.push(id);
|
||||
}
|
||||
}
|
||||
for entity in &search.entities {
|
||||
let id = entity.entity.id.clone();
|
||||
if seen.insert(id.clone()) {
|
||||
ids.push(id);
|
||||
}
|
||||
push_id(entity.entity.id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user