chore: refactor retrieval pipeline to chunk-first RRF with derived entities and slimmer eval surface.

Collapse the multi-strategy entity engine into one benchmarked chunk retrieval path, derive entities from retrieved chunks, and update consumers, docs, and clippy fixes across the workspace.
This commit is contained in:
Per Stark
2026-05-30 22:19:08 +02:00
parent c70141de35
commit 5c2d2e24d3
38 changed files with 1049 additions and 2614 deletions
-91
View File
@@ -44,7 +44,6 @@
--leading-snug: 1.375;
--leading-relaxed: 1.625;
--ease-out: cubic-bezier(0, 0, 0.2, 1);
--ease-in-out: cubic-bezier(0.4, 0, 0.2, 1);
--animate-pulse: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
--default-transition-duration: 150ms;
--default-transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
@@ -285,37 +284,6 @@
}
}
}
.drawer-open {
> .drawer-side {
overflow-y: auto;
}
> .drawer-toggle {
display: none;
& ~ .drawer-side {
pointer-events: auto;
visibility: visible;
position: sticky;
display: block;
width: auto;
overscroll-behavior: auto;
opacity: 100%;
& > .drawer-overlay {
cursor: default;
background-color: transparent;
}
& > *:not(.drawer-overlay) {
translate: 0%;
[dir="rtl"] & {
translate: 0%;
}
}
}
&:checked ~ .drawer-side {
pointer-events: auto;
visibility: visible;
}
}
}
.drawer-toggle {
position: fixed;
height: calc(0.25rem * 0);
@@ -1074,22 +1042,6 @@
grid-row-start: 1;
min-width: calc(0.25rem * 0);
}
.chat-image {
grid-row: span 2 / span 2;
align-self: flex-end;
}
.chat-footer {
grid-row-start: 3;
display: flex;
gap: calc(0.25rem * 1);
font-size: 0.6875rem;
}
.chat-header {
grid-row-start: 1;
display: flex;
gap: calc(0.25rem * 1);
font-size: 0.6875rem;
}
.container {
width: 100%;
@media (width >= 40rem) {
@@ -1796,9 +1748,6 @@
.w-10 {
width: calc(var(--spacing) * 10);
}
.w-11 {
width: calc(var(--spacing) * 11);
}
.w-11\/12 {
width: calc(11/12 * 100%);
}
@@ -1862,9 +1811,6 @@
.flex-none {
flex: none;
}
.flex-shrink {
flex-shrink: 1;
}
.flex-shrink-0 {
flex-shrink: 0;
}
@@ -1877,13 +1823,6 @@
.grow {
flex-grow: 1;
}
.border-collapse {
border-collapse: collapse;
}
.-translate-y-1 {
--tw-translate-y: calc(var(--spacing) * -1);
translate: var(--tw-translate-x) var(--tw-translate-y);
}
.-translate-y-1\/2 {
--tw-translate-y: calc(calc(1/2 * 100%) * -1);
translate: var(--tw-translate-x) var(--tw-translate-y);
@@ -1956,9 +1895,6 @@
.justify-start {
justify-content: flex-start;
}
.gap-0 {
gap: calc(var(--spacing) * 0);
}
.gap-0\.5 {
gap: calc(var(--spacing) * 0.5);
}
@@ -2115,9 +2051,6 @@
.bg-transparent {
background-color: transparent;
}
.bg-warning {
background-color: var(--color-warning);
}
.bg-warning\/10 {
background-color: var(--color-warning);
@supports (color: color-mix(in lab, red, red)) {
@@ -2136,9 +2069,6 @@
.loading-spinner {
mask-image: url("data:image/svg+xml,%3Csvg width='24' height='24' stroke='black' viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cg transform-origin='center'%3E%3Ccircle cx='12' cy='12' r='9.5' fill='none' stroke-width='3' stroke-linecap='round'%3E%3CanimateTransform attributeName='transform' type='rotate' from='0 12 12' to='360 12 12' dur='2s' repeatCount='indefinite'/%3E%3Canimate attributeName='stroke-dasharray' values='0,150;42,150;42,150' keyTimes='0;0.475;1' dur='1.5s' repeatCount='indefinite'/%3E%3Canimate attributeName='stroke-dashoffset' values='0;-16;-59' keyTimes='0;0.475;1' dur='1.5s' repeatCount='indefinite'/%3E%3C/circle%3E%3C/g%3E%3C/svg%3E");
}
.mask-repeat {
mask-repeat: repeat;
}
.fill-current {
fill: currentcolor;
}
@@ -2169,9 +2099,6 @@
.p-8 {
padding: calc(var(--spacing) * 8);
}
.px-1 {
padding-inline: calc(var(--spacing) * 1);
}
.px-1\.5 {
padding-inline: calc(var(--spacing) * 1.5);
}
@@ -2326,9 +2253,6 @@
--tw-tracking: var(--tracking-widest);
letter-spacing: var(--tracking-widest);
}
.text-wrap {
text-wrap: wrap;
}
.break-words {
overflow-wrap: break-word;
}
@@ -2395,17 +2319,6 @@
.italic {
font-style: italic;
}
.underline {
text-decoration-line: underline;
}
.swap-active {
.swap-off {
opacity: 0%;
}
.swap-on {
opacity: 100%;
}
}
.opacity-0 {
opacity: 0%;
}
@@ -2496,10 +2409,6 @@
--tw-duration: 300ms;
transition-duration: 300ms;
}
.ease-in-out {
--tw-ease: var(--ease-in-out);
transition-timing-function: var(--ease-in-out);
}
.ease-out {
--tw-ease: var(--ease-out);
transition-timing-function: var(--ease-out);
+1 -8
View File
@@ -2,10 +2,7 @@ use common::storage::types::conversation::SidebarConversation;
use common::storage::{db::SurrealDbClient, store::StorageManager};
use common::utils::embedding::EmbeddingProvider;
use common::utils::template_engine::{ProvidesTemplateEngine, TemplateEngine};
use common::{
create_template_engine, storage::db::ProvidesDb,
utils::config::{AppConfig, RetrievalStrategy},
};
use common::{create_template_engine, storage::db::ProvidesDb, utils::config::AppConfig};
use retrieval_pipeline::reranking::RerankerPool;
use std::collections::HashMap;
use std::sync::{
@@ -75,10 +72,6 @@ impl HtmlState {
}
}
pub fn retrieval_strategy(&self) -> RetrievalStrategy {
self.config.resolved_retrieval_strategy()
}
pub async fn get_cached_conversation_archive(
&self,
user_id: &str,
@@ -16,12 +16,9 @@ use futures::{
};
use json_stream_parser::JsonStreamParser;
use minijinja::Value;
use retrieval_pipeline::{
answer_retrieval::{
chunks_to_chat_context, create_chat_request, create_user_message_with_history,
LLMResponseFormat,
},
retrieved_entities_to_json,
use retrieval_pipeline::answer_retrieval::{
chunks_to_chat_context, create_chat_request, create_user_message_with_history,
LLMResponseFormat,
};
use serde::{Deserialize, Serialize};
use serde_json::from_str;
@@ -189,11 +186,7 @@ struct ReferenceData {
}
fn extract_reference_strings(response: &LLMResponseFormat) -> Vec<String> {
response
.references
.iter()
.map(|reference| reference.reference.clone())
.collect()
response.reference_ids()
}
#[allow(clippy::too_many_lines)]
@@ -362,10 +355,9 @@ async fn prepare_chat_request(
None => None,
};
let strategy = state.retrieval_strategy();
let config = retrieval_pipeline::RetrievalConfig::for_chat(strategy);
let config = retrieval_pipeline::RetrievalConfig::default();
let retrieval_result = match retrieval_pipeline::retrieve_entities(
let retrieval_result = match retrieval_pipeline::retrieve(
&state.db,
&state.openai_client,
Some(&*state.embedding_provider),
@@ -387,12 +379,9 @@ async fn prepare_chat_request(
let allowed_reference_ids = collect_reference_ids_from_retrieval(&retrieval_result);
let context_json = match retrieval_result {
retrieval_pipeline::StrategyOutput::Chunks(chunks) => chunks_to_chat_context(&chunks),
retrieval_pipeline::StrategyOutput::Entities(entities) => {
retrieved_entities_to_json(&entities)
}
retrieval_pipeline::StrategyOutput::Search(search_result) => {
chunks_to_chat_context(&search_result.chunks)
retrieval_pipeline::RetrievalOutput::Chunks(chunks) => chunks_to_chat_context(&chunks),
retrieval_pipeline::RetrievalOutput::WithEntities { chunks, .. } => {
chunks_to_chat_context(&chunks)
}
};
let formatted_user_message =
@@ -9,7 +9,7 @@ use common::{
types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk, StoredObject},
},
};
use retrieval_pipeline::StrategyOutput;
use retrieval_pipeline::RetrievalOutput;
use uuid::Uuid;
pub(crate) const MAX_REFERENCE_COUNT: usize = 10;
@@ -86,40 +86,29 @@ pub(crate) enum ReferenceLookupTarget {
}
pub(crate) fn collect_reference_ids_from_retrieval(
retrieval_result: &StrategyOutput,
retrieval_result: &RetrievalOutput,
) -> Vec<String> {
let mut ids = Vec::new();
let mut seen = HashSet::new();
let mut push_id = |id: String| {
if seen.insert(id.clone()) {
ids.push(id);
}
};
match retrieval_result {
StrategyOutput::Chunks(chunks) => {
RetrievalOutput::Chunks(chunks) => {
for chunk in chunks {
let id = chunk.chunk.id.clone();
if seen.insert(id.clone()) {
ids.push(id);
}
push_id(chunk.chunk.id.clone());
}
}
StrategyOutput::Entities(entities) => {
RetrievalOutput::WithEntities { chunks, entities } => {
for chunk in chunks {
push_id(chunk.chunk.id.clone());
}
for entity in entities {
let id = entity.entity.id.clone();
if seen.insert(id.clone()) {
ids.push(id);
}
}
}
StrategyOutput::Search(search) => {
for chunk in &search.chunks {
let id = chunk.chunk.id.clone();
if seen.insert(id.clone()) {
ids.push(id);
}
}
for entity in &search.entities {
let id = entity.entity.id.clone();
if seen.insert(id.clone()) {
ids.push(id);
}
push_id(entity.entity.id.clone());
}
}
}
+1 -1
View File
@@ -13,7 +13,7 @@ use crate::{
middlewares::{
auth_middleware::RequireUser,
response_middleware::{
template_as_response, HtmlError, TemplateResponse, TemplateResult, ResponseResult,
template_as_response, TemplateResponse, TemplateResult, ResponseResult,
},
},
utils::text_content_preview::truncate_text_contents,
+5 -5
View File
@@ -32,7 +32,7 @@ use crate::{
middlewares::{
auth_middleware::RequireUser,
response_middleware::{
template_with_headers, HtmlError, TemplateResponse, TemplateResult, ResponseResult,
template_with_headers, TemplateResponse, TemplateResult, ResponseResult,
},
},
utils::pagination::{paginate_items, Pagination},
@@ -284,9 +284,9 @@ pub async fn suggest_knowledge_relationships(
None => None,
};
let config = retrieval_pipeline::RetrievalConfig::for_relationship_suggestion();
if let Ok(retrieval_pipeline::StrategyOutput::Entities(results)) =
retrieval_pipeline::retrieve_entities(
let config = retrieval_pipeline::RetrievalConfig::with_entities();
if let Ok(retrieval_pipeline::RetrievalOutput::WithEntities { entities, .. }) =
retrieval_pipeline::retrieve(
&state.db,
&state.openai_client,
Some(&*state.embedding_provider),
@@ -297,7 +297,7 @@ pub async fn suggest_knowledge_relationships(
)
.await
{
for retrieval_pipeline::RetrievedEntity { entity, score, .. } in results {
for retrieval_pipeline::RetrievedEntity { entity, score, .. } in entities {
if suggestion_scores.len() >= MAX_RELATIONSHIP_SUGGESTIONS {
break;
}
@@ -12,7 +12,7 @@ use crate::html_state::HtmlState;
use crate::middlewares::{
auth_middleware::RequireUser,
response_middleware::{
template_with_headers, HtmlError, TemplateResponse, TemplateResult, ResponseResult,
template_with_headers, TemplateResponse, TemplateResult, ResponseResult,
},
};
use common::storage::types::{
+22 -21
View File
@@ -4,7 +4,7 @@ use axum::{
extract::{Query, State},
};
use common::storage::types::{text_content::TextContent, user::User};
use retrieval_pipeline::{RetrievalConfig, SearchResult, SearchTarget, StrategyOutput};
use retrieval_pipeline::{retrieve, RetrievalConfig, RetrievalOutput, RetrievedChunk, RetrievedEntity};
use serde::{de, Deserialize, Deserializer, Serialize};
use std::{fmt, str::FromStr};
@@ -108,35 +108,35 @@ async fn perform_search(
return Ok((Vec::new(), String::new()));
}
let config = RetrievalConfig::for_search(SearchTarget::Both);
let config = RetrievalConfig::with_entities();
let reranker_lease = match &state.reranker_pool {
Some(pool) => pool.checkout().await,
None => None,
};
let params = retrieval_pipeline::pipeline::StrategyParams {
db_client: &state.db,
openai_client: &state.openai_client,
embedding_provider: Some(&state.embedding_provider),
input_text: trimmed_query,
user_id: &user.id,
let result = retrieve(
&state.db,
&state.openai_client,
Some(&state.embedding_provider),
trimmed_query,
&user.id,
config,
reranker: reranker_lease,
};
let result = retrieval_pipeline::pipeline::execute(params).await?;
reranker_lease,
)
.await?;
let search_result = match result {
StrategyOutput::Search(sr) => sr,
_ => SearchResult::new(vec![], vec![]),
let (chunks, entities) = match result {
RetrievalOutput::WithEntities { chunks, entities } => (chunks, entities),
RetrievalOutput::Chunks(chunks) => (chunks, Vec::new()),
};
let source_label_map = collect_source_label_map(state, user, &search_result).await?;
let source_label_map = collect_source_label_map(state, user, &chunks, &entities).await?;
let mut combined_results: Vec<SearchResultForTemplate> =
Vec::with_capacity(search_result.chunks.len().saturating_add(search_result.entities.len()));
Vec::with_capacity(chunks.len().saturating_add(entities.len()));
for chunk_result in search_result.chunks {
for chunk_result in chunks {
let source_label = source_label_map
.get(&chunk_result.chunk.source_id)
.cloned()
@@ -155,7 +155,7 @@ async fn perform_search(
});
}
for entity_result in search_result.entities {
for entity_result in entities {
let source_label = source_label_map
.get(&entity_result.entity.source_id)
.cloned()
@@ -187,13 +187,14 @@ async fn perform_search(
async fn collect_source_label_map(
state: &HtmlState,
user: &User,
search_result: &SearchResult,
chunks: &[RetrievedChunk],
entities: &[RetrievedEntity],
) -> Result<std::collections::HashMap<String, String>, HtmlError> {
let mut source_ids = HashSet::new();
for chunk_result in &search_result.chunks {
for chunk_result in chunks {
source_ids.insert(chunk_result.chunk.source_id.clone());
}
for entity_result in &search_result.entities {
for entity_result in entities {
source_ids.insert(entity_result.entity.source_id.clone());
}