diff --git a/src/models/graph_entities.rs b/src/models/graph_entities.rs index d815ea2..903f222 100644 --- a/src/models/graph_entities.rs +++ b/src/models/graph_entities.rs @@ -38,7 +38,7 @@ impl From for KnowledgeEntityType { /// Represents a relationship between two knowledge entities. #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Relationship { +pub struct KnowledgeRelationship { pub id: Uuid, // Generated in Rust #[serde(rename = "in")] pub in_: Uuid, // Target KnowledgeEntity ID @@ -49,6 +49,7 @@ pub struct Relationship { use std::collections::HashMap; +use crate::utils::llm::LLMGraphAnalysisResult; use crate::utils::llm::LLMKnowledgeEntity; use crate::utils::llm::LLMRelationship; @@ -90,9 +91,9 @@ impl From<&LLMKnowledgeEntity> for KnowledgeEntity { } } -impl From<&LLMRelationship> for Relationship { +impl From<&LLMRelationship> for KnowledgeRelationship { fn from(llm_rel: &LLMRelationship) -> Self { - Relationship { + KnowledgeRelationship { id: Uuid::new_v4(), in_: Uuid::nil(), // Placeholder; to be set after mapping out: Uuid::nil(), // Placeholder; to be set after mapping diff --git a/src/models/text_content.rs b/src/models/text_content.rs index c233408..4cfabc9 100644 --- a/src/models/text_content.rs +++ b/src/models/text_content.rs @@ -4,6 +4,8 @@ use uuid::Uuid; use crate::{models::file_info::FileInfo, utils::llm::create_json_ld}; use thiserror::Error; +use super::graph_entities::{KnowledgeEntity, KnowledgeRelationship}; + /// Represents a single piece of text content extracted from various sources. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct TextContent { @@ -14,42 +16,6 @@ pub struct TextContent { pub category: String, } -/// Represents a generic knowledge entity in the graph. -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct KnowledgeEntity { - pub id: Uuid, - pub name: String, - pub description: String, - pub source_uuid: Uuid, - pub entity_type: String, -} - -/// A struct representing a knowledge source in the graph database. -#[derive(Deserialize, Debug, Serialize, Clone )] -pub struct KnowledgeSource { - pub id: Uuid, - pub title: String, - pub description: String, - pub relationships: Vec, -} - -/// A struct representing a relationship between knowledge sources. -#[derive(Deserialize, Clone, Serialize, Debug)] -pub struct Relationship { - #[serde(rename = "type")] - pub type_: String, - pub target: String, -} - -/// A struct representing the result of an LLM analysis. -#[derive(Deserialize, Debug,Serialize)] -pub struct AnalysisResult { - pub knowledge_source: KnowledgeSource, - pub category: String, - pub instructions: String, -} - - /// Error types for processing `TextContent`. #[derive(Error, Debug)] pub enum ProcessingError { @@ -74,37 +40,39 @@ impl TextContent { // Step 1: Send to LLM for analysis let analysis = create_json_ld(&self.category, &self.instructions, &self.text).await?; - info!("{:#?}", &analysis); - - // Step 2: Store analysis results in Graph DB - // client.store_knowledge_source(&analysis.knowledge_source).await?; - - // Step 3: Store relationships in Graph DB - // for relationship in analysis.knowledge_source.relationships.iter() { - // client - // .store_relationship(&analysis.knowledge_source.id, relationship) - // .await?; - // } + // info!("{:#?}", &analysis); - // Step 3: Split text and store in Vector DB + // Step 2: Convert LLM analysis to database entities + let (entities, relationships) = analysis.to_database_entities(); + + // Step 3: Store in database + self.store_in_graph_db(entities, relationships).await?; + + + // Step 4: Split text and store in Vector DB // self.store_in_vector_db().await?; Ok(()) } - /// Stores analysis results in a graph database. - #[allow(dead_code)] - async fn store_in_graph_db(&self, _analysis: &AnalysisResult) -> Result<(), ProcessingError> { - // TODO: Implement storage logic for your specific graph database. - // Example: - /* - let graph_db = GraphDB::new("http://graph-db:8080"); - graph_db.insert_analysis(analysis).await.map_err(|e| ProcessingError::GraphDBError(e.to_string()))?; - */ - unimplemented!() + async fn store_in_graph_db( + &self, + entities: Vec, + relationships: Vec + ) -> Result<(), ProcessingError> { + for entity in entities { + info!("{:?}", entity); + } + + for relationship in relationships { + info!("{:?}", relationship); + } + + Ok(()) } + /// Splits text and stores it in a vector database. #[allow(dead_code)] async fn store_in_vector_db(&self) -> Result<(), ProcessingError> { diff --git a/src/utils/llm.rs b/src/utils/llm.rs index 261746b..cacd9f4 100644 --- a/src/utils/llm.rs +++ b/src/utils/llm.rs @@ -4,9 +4,13 @@ use async_openai::types::CreateChatCompletionRequestArgs; use serde::Deserialize; use serde::Serialize; use tracing::debug; +use uuid::Uuid; +use crate::models::graph_entities::GraphMapper; +use crate::models::graph_entities::KnowledgeEntity; +use crate::models::graph_entities::KnowledgeEntityType; +use crate::models::graph_entities::KnowledgeRelationship; use crate::models::text_content::ProcessingError; use serde_json::json; -use crate::models::text_content::AnalysisResult; /// Represents a single knowledge entity from the LLM. #[derive(Debug, Serialize, Deserialize, Clone)] @@ -33,6 +37,47 @@ pub struct LLMGraphAnalysisResult { pub relationships: Vec, } +impl LLMGraphAnalysisResult { + pub fn to_database_entities(&self) -> (Vec, Vec) { + let mut mapper = GraphMapper::new(); + + // First pass: Create all entities and map their keys to UUIDs + let entities: Vec = self.knowledge_entities + .iter() + .map(|llm_entity| { + let id = mapper.assign_id(&llm_entity.key); + KnowledgeEntity { + id, + name: llm_entity.name.clone(), + description: llm_entity.description.clone(), + entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()), + source_id: None, + metadata: None, + } + }) + .collect(); + + // Second pass: Create relationships using mapped UUIDs + let relationships: Vec = self.relationships + .iter() + .filter_map(|llm_rel| { + let source_id = mapper.get_id(&llm_rel.source)?; + let target_id = mapper.get_id(&llm_rel.target)?; + + Some(KnowledgeRelationship { + id: Uuid::new_v4(), + out: *source_id, + in_: *target_id, + relationship_type: llm_rel.type_.clone(), + metadata: None, + }) + }) + .collect(); + + (entities, relationships) + } +} + /// Sends text to an LLM for analysis. pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> Result { let client = async_openai::Client::new(); @@ -117,8 +162,9 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> R 2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`. 3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet. 4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo. - 5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity. - 6. Optionally, add any relevant metadata within each object as needed."#; + 5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity" + 6. Only create relationships between existing KnowledgeEntities. + "#; let user_message = format!(