passed wide smoke check

This commit is contained in:
Per Stark
2025-12-10 13:54:08 +01:00
parent 2e2ea0c4ff
commit a5bc72aedf
12 changed files with 403 additions and 235 deletions

View File

@@ -151,6 +151,10 @@ pub async fn get_response_stream(
retrieval_pipeline::StrategyOutput::Entities(entities) => {
retrieved_entities_to_json(&entities)
}
retrieval_pipeline::StrategyOutput::Search(search_result) => {
// For chat, use chunks from the search result
chunks_to_chat_context(&search_result.chunks)
}
};
let formatted_user_message =
create_user_message_with_history(&context_json, &history, &user_message.content);

View File

@@ -1,17 +1,13 @@
use std::{fmt, str::FromStr};
use std::{fmt, str::FromStr, time::Duration};
use axum::{
extract::{Query, State},
response::IntoResponse,
};
use common::storage::types::{
conversation::Conversation,
knowledge_entity::{KnowledgeEntity, KnowledgeEntitySearchResult},
text_content::{TextContent, TextContentSearchResult},
user::User,
};
use futures::future::try_join;
use common::storage::types::{conversation::Conversation, user::User};
use retrieval_pipeline::{RetrievalConfig, SearchResult, SearchTarget, StrategyOutput};
use serde::{de, Deserialize, Deserializer, Serialize};
use tokio::time::error::Elapsed;
use crate::{
html_state::HtmlState,
@@ -20,6 +16,7 @@ use crate::{
response_middleware::{HtmlError, TemplateResponse},
},
};
/// Serde deserialization decorator to map empty Strings to None,
fn empty_string_as_none<'de, D, T>(de: D) -> Result<Option<T>, D::Error>
where
@@ -40,6 +37,26 @@ pub struct SearchParams {
query: Option<String>,
}
/// Chunk result for template rendering
#[derive(Serialize)]
struct TextChunkForTemplate {
id: String,
source_id: String,
chunk: String,
score: f32,
}
/// Entity result for template rendering (from pipeline)
#[derive(Serialize)]
struct KnowledgeEntityForTemplate {
id: String,
name: String,
description: String,
entity_type: String,
source_id: String,
score: f32,
}
pub async fn search_result_handler(
State(state): State<HtmlState>,
Query(params): Query<SearchParams>,
@@ -50,9 +67,9 @@ pub async fn search_result_handler(
result_type: String,
score: f32,
#[serde(skip_serializing_if = "Option::is_none")]
text_content: Option<TextContentSearchResult>,
text_chunk: Option<TextChunkForTemplate>,
#[serde(skip_serializing_if = "Option::is_none")]
knowledge_entity: Option<KnowledgeEntitySearchResult>,
knowledge_entity: Option<KnowledgeEntityForTemplate>,
}
#[derive(Serialize)]
@@ -70,37 +87,64 @@ pub async fn search_result_handler(
if trimmed_query.is_empty() {
(Vec::<SearchResultForTemplate>::new(), String::new())
} else {
const TOTAL_LIMIT: usize = 10;
let (text_results, entity_results) = try_join(
TextContent::search(&state.db, trimmed_query, &user.id, TOTAL_LIMIT),
KnowledgeEntity::search(&state.db, trimmed_query, &user.id, TOTAL_LIMIT),
// Use retrieval pipeline Search strategy
let config = RetrievalConfig::for_search(SearchTarget::Both);
let result = retrieval_pipeline::pipeline::run_pipeline(
&state.db,
&state.openai_client,
None, // No embedding provider in HtmlState
trimmed_query,
&user.id,
config,
None, // No reranker for now
)
.await?;
let mut combined_results: Vec<SearchResultForTemplate> =
Vec::with_capacity(text_results.len() + entity_results.len());
let search_result = match result {
StrategyOutput::Search(sr) => sr,
_ => SearchResult::new(vec![], vec![]),
};
for text_result in text_results {
let score = text_result.score;
let mut combined_results: Vec<SearchResultForTemplate> =
Vec::with_capacity(search_result.chunks.len() + search_result.entities.len());
// Add chunk results
for chunk_result in search_result.chunks {
combined_results.push(SearchResultForTemplate {
result_type: "text_content".to_string(),
score,
text_content: Some(text_result),
result_type: "text_chunk".to_string(),
score: chunk_result.score,
text_chunk: Some(TextChunkForTemplate {
id: chunk_result.chunk.id,
source_id: chunk_result.chunk.source_id,
chunk: chunk_result.chunk.chunk,
score: chunk_result.score,
}),
knowledge_entity: None,
});
}
for entity_result in entity_results {
let score = entity_result.score;
// Add entity results
for entity_result in search_result.entities {
combined_results.push(SearchResultForTemplate {
result_type: "knowledge_entity".to_string(),
score,
text_content: None,
knowledge_entity: Some(entity_result),
score: entity_result.score,
text_chunk: None,
knowledge_entity: Some(KnowledgeEntityForTemplate {
id: entity_result.entity.id,
name: entity_result.entity.name,
description: entity_result.entity.description,
entity_type: format!("{:?}", entity_result.entity.entity_type),
source_id: entity_result.entity.source_id,
score: entity_result.score,
}),
});
}
// Sort by score descending
combined_results.sort_by(|a, b| b.score.total_cmp(&a.score));
// Limit results
const TOTAL_LIMIT: usize = 10;
combined_results.truncate(TOTAL_LIMIT);
(combined_results, trimmed_query.to_string())