wip query

This commit is contained in:
Per Stark
2024-11-25 20:39:53 +01:00
parent 764cd52c12
commit 70beb4c753
7 changed files with 179 additions and 102 deletions

View File

@@ -1,5 +1,5 @@
use surrealdb::{engine::remote::ws::Client, Surreal};
use tracing::info;
use tracing::debug;
use crate::{error::ProcessingError, storage::types::knowledge_entity::KnowledgeEntity};
@@ -71,23 +71,21 @@ pub async fn find_entities_by_relationship_by_source_ids(
db_client: &Surreal<Client>,
source_ids: &[String],
) -> Result<Vec<KnowledgeEntity>, ProcessingError> {
// Create a comma-separated list of IDs wrapped in backticks
let ids = source_ids
.iter()
// .map(|id| format!("`{}`", id))
.map(|id| format!("knowledge_entity:`{}`", id))
.collect::<Vec<_>>()
.join(", ");
info!("{:?}", ids);
// let first = format!("knowledge_entity:`{}`", source_ids.first().unwrap());
debug!("{:?}", ids);
let query = format!(
"SELECT *, array::complement(<->relates_to<->knowledge_entity, [id]) AS related FROM [{}] FETCH related",
"SELECT *, <-> relates_to <-> knowledge_entity AS related FROM [{}]",
ids
);
info!("{}", query);
debug!("{}", query);
let result: Vec<KnowledgeEntity> = db_client.query(query).await?.take(0)?;
@@ -95,14 +93,14 @@ pub async fn find_entities_by_relationship_by_source_ids(
}
pub async fn find_entities_by_relationship_by_id(
db_client: &Surreal<Client>,
source_id: &str,
source_id: String,
) -> Result<Vec<KnowledgeEntity>, ProcessingError> {
let query = format!(
"SELECT *, <-> relates_to <-> knowledge_entity AS related FROM knowledge_entity:`{}`",
source_id
);
info!("{}", query);
debug!("{}", query);
let result: Vec<KnowledgeEntity> = db_client.query(query).await?.take(0)?;

View File

@@ -1,2 +1,94 @@
pub mod graph;
pub mod vector;
use crate::{
error::ProcessingError,
retrieval::{
graph::{find_entities_by_relationship_by_id, find_entities_by_source_ids},
vector::find_items_by_vector_similarity,
},
storage::types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk},
};
use futures::future::{try_join, try_join_all};
use std::collections::HashMap;
use surrealdb::{engine::remote::ws::Client, Surreal};
use tracing::info;
/// Performs a comprehensive knowledge entity retrieval using multiple search strategies
/// to find the most relevant entities for a given query.
///
/// # Strategy
/// The function employs a three-pronged approach to knowledge retrieval:
/// 1. Direct vector similarity search on knowledge entities
/// 2. Text chunk similarity search with source entity lookup
/// 3. Graph relationship traversal from related entities
///
/// This combined approach ensures both semantic similarity matches and structurally
/// related content are included in the results.
///
/// # Arguments
/// * `db_client` - SurrealDB client for database operations
/// * `openai_client` - OpenAI client for vector embeddings generation
/// * `query` - The search query string to find relevant knowledge entities
///
/// # Returns
/// * `Result<Vec<KnowledgeEntity>, ProcessingError>` - A deduplicated vector of relevant
/// knowledge entities, or an error if the retrieval process fails
pub async fn combined_knowledge_entity_retrieval(
db_client: &Surreal<Client>,
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
query: String,
) -> Result<Vec<KnowledgeEntity>, ProcessingError> {
info!("Received input: {:?}", query);
let (items_from_knowledge_entity_similarity, closest_chunks) = try_join(
find_items_by_vector_similarity(
10,
query.clone(),
db_client,
"knowledge_entity".to_string(),
openai_client,
),
find_items_by_vector_similarity(
5,
query,
db_client,
"text_chunk".to_string(),
openai_client,
),
)
.await?;
let source_ids = closest_chunks
.iter()
.map(|chunk: &TextChunk| chunk.source_id.clone())
.collect::<Vec<String>>();
let items_from_text_chunk_similarity: Vec<KnowledgeEntity> =
find_entities_by_source_ids(source_ids, "knowledge_entity".to_string(), db_client).await?;
let items_from_relationships_futures: Vec<_> = items_from_text_chunk_similarity
.clone()
.into_iter()
.map(|entity| find_entities_by_relationship_by_id(db_client, entity.id.clone()))
.collect();
let items_from_relationships = try_join_all(items_from_relationships_futures)
.await?
.into_iter()
.flatten()
.collect::<Vec<KnowledgeEntity>>();
let entities: Vec<KnowledgeEntity> = items_from_knowledge_entity_similarity
.into_iter()
.chain(items_from_text_chunk_similarity.into_iter())
.chain(items_from_relationships.into_iter())
.fold(HashMap::new(), |mut map, entity| {
map.insert(entity.id.clone(), entity);
map
})
.into_values()
.collect();
Ok(entities)
}