diff --git a/src/models/text_content.rs b/src/models/text_content.rs index 21a74f3..4c2b530 100644 --- a/src/models/text_content.rs +++ b/src/models/text_content.rs @@ -44,20 +44,7 @@ impl TextContent { } } - // panic!("STOPPING"); - // let deleted: Vec = db_client.delete("text_chunk").await?; - // info! {"{:?} KnowledgeEntities deleted", deleted.len()}; - - // let relationships_deleted: Vec = - // db_client.delete("knowledge_relationship").await?; - // info!("{:?} Relationships deleted", relationships_deleted.len()); - - // panic!("STOP"); - - // db_client.query("REMOVE INDEX embeddings ON knowledge_entity").await?; - // db_client - // .query("DEFINE INDEX idx_embedding ON text_chunk FIELDS embedding HNSW DIMENSION 1536") - // .await?; + // Rebuild indexes db_client.rebuild_indexes().await?; // Step 1: Send to LLM for analysis diff --git a/src/utils/llm.rs b/src/utils/llm.rs index 905918c..7138346 100644 --- a/src/utils/llm.rs +++ b/src/utils/llm.rs @@ -1,20 +1,22 @@ use crate::{ error::ProcessingError, models::graph_entities::GraphMapper, + retrieval::vector::find_items_by_vector_similarity, storage::types::{ knowledge_entity::{KnowledgeEntity, KnowledgeEntityType}, knowledge_relationship::KnowledgeRelationship, + StoredObject, }, }; use async_openai::types::{ ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage, - CreateChatCompletionRequestArgs, CreateEmbeddingRequestArgs, + CreateChatCompletionRequestArgs, }; use serde::{Deserialize, Serialize}; use serde_json::json; use surrealdb::engine::remote::ws::Client; use surrealdb::Surreal; -use tracing::{debug, info}; +use tracing::debug; use uuid::Uuid; use super::embedding::generate_embedding; @@ -115,13 +117,12 @@ impl LLMGraphAnalysisResult { let target_db_id = mapper.get_or_parse_id(&llm_rel.target); debug!("IN: {}, OUT: {}", &source_db_id, &target_db_id); - Some(KnowledgeRelationship { - id: Uuid::new_v4().to_string(), - in_: source_db_id.to_string(), - out: target_db_id.to_string(), - relationship_type: llm_rel.type_.clone(), - metadata: None, - }) + Some(KnowledgeRelationship::new( + source_db_id.to_string(), + target_db_id.to_string(), + llm_rel.type_.to_owned(), + None, + )) }) .collect(); @@ -139,45 +140,34 @@ pub async fn create_json_ld( ) -> Result { // Format the input for more cohesive comparison let input_text = format!( - "content: {:?}, category: {:?}, user_instructions: {:?}", + "content: {}, category: {}, user_instructions: {}", text, category, instructions ); - // Generate embedding of the input - let input_embedding = generate_embedding(&openai_client, input_text).await?; + let closest_entities: Vec = find_items_by_vector_similarity( + 10, + input_text, + db_client, + KnowledgeEntity::table_name().to_string(), + openai_client, + ) + .await?; - let number_of_entities_to_get = 10; - - // Construct the query - let closest_query = format!("SELECT *, vector::distance::knn() AS distance FROM knowledge_entity WHERE embedding <|{},40|> {:?} ORDER BY distance",number_of_entities_to_get, input_embedding); - - // Perform query and deserialize to struct - let closest_entities: Vec = db_client.query(closest_query).await?.take(0)?; - #[allow(dead_code)] - #[derive(Debug)] - struct KnowledgeEntityToLLM { - id: String, - name: String, - description: String, - } - - info!( - "Number of KnowledgeEntities sent as context: {}", - closest_entities.len() - ); - - // Only keep most relevant information - let closest_entities_to_llm: Vec = closest_entities - .clone() - .into_iter() - .map(|entity| KnowledgeEntityToLLM { - id: entity.id, - name: entity.name, - description: entity.description, + // Format the KnowledgeEntity, remove redudant fields + let closest_entities_to_llm = json!(closest_entities + .iter() + .map(|entity| { + json!({ + "KnowledgeEntity": { + "id": entity.id, + "name": entity.name, + "description": entity.description + } + }) }) - .collect(); + .collect::>()); - debug!("{:?}", closest_entities_to_llm); + // let db_context = serde_json::to_string_pretty(&closest_entities_to_llm).unwrap(); let schema = json!({ "type": "object", @@ -268,10 +258,12 @@ pub async fn create_json_ld( "#; let user_message = format!( - "Category: {}\nInstructions: {}\nContent:\n{}\nExisting KnowledgeEntities in database:{:?}", + "Category:\n{}\nInstructions:\n{}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}", category, instructions, text, closest_entities_to_llm ); + debug!("{}", user_message); + // Build the chat completion request let request = CreateChatCompletionRequestArgs::default() .model("gpt-4o-mini")