diff --git a/src/models/graph_entities.rs b/src/models/graph_entities.rs index f450a21..77e159f 100644 --- a/src/models/graph_entities.rs +++ b/src/models/graph_entities.rs @@ -51,6 +51,7 @@ impl From for KnowledgeEntityType { /// Represents a relationship between two knowledge entities. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct KnowledgeRelationship { + #[serde(deserialize_with = "thing_to_string")] pub id: String, #[serde(rename = "in")] pub in_: String, // Target KnowledgeEntity ID diff --git a/src/models/text_content.rs b/src/models/text_content.rs index df10669..6bd2772 100644 --- a/src/models/text_content.rs +++ b/src/models/text_content.rs @@ -84,13 +84,13 @@ impl TextContent { for relationship in relationships { // info!("{:?}", relationship); - // let _created: Option = db_client - // .client - // .insert(("knowledge_relationship", &relationship.id.to_string())) - // .content(relationship) - // .await?; + let _created: Option = db_client + .client + .insert(("knowledge_relationship", &relationship.id.to_string())) + .content(relationship) + .await?; - // debug!("{:?}",_created); + debug!("{:?}",_created); } diff --git a/src/utils/llm.rs b/src/utils/llm.rs index 0c8491b..f0a6617 100644 --- a/src/utils/llm.rs +++ b/src/utils/llm.rs @@ -1,37 +1,31 @@ -use async_openai::types::ChatCompletionRequestSystemMessage; -use async_openai::types::ChatCompletionRequestUserMessage; -use async_openai::types::CreateChatCompletionRequestArgs; -use serde::Deserialize; -use serde::Serialize; -use surrealdb::sql::Thing; -use surrealdb::RecordId; -use tracing::debug; -use tracing::info; -use uuid::Uuid; use crate::models::graph_entities::GraphMapper; use crate::models::graph_entities::KnowledgeEntity; use crate::models::graph_entities::KnowledgeEntityType; use crate::models::graph_entities::KnowledgeRelationship; use crate::models::text_content::ProcessingError; use crate::surrealdb::SurrealDbClient; +use async_openai::types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessage, ChatCompletionRequestSystemMessage }; +use serde::{Deserialize, Serialize}; use serde_json::json; +use tracing::{info,debug}; +use uuid::Uuid; /// Represents a single knowledge entity from the LLM. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct LLMKnowledgeEntity { - pub key: String, // Temporary identifier + pub key: String, // Temporary identifier pub name: String, pub description: String, - pub entity_type: String, // Should match KnowledgeEntityType variants + pub entity_type: String, // Should match KnowledgeEntityType variants } /// Represents a single relationship from the LLM. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct LLMRelationship { #[serde(rename = "type")] - pub type_: String, // e.g., RelatedTo, RelevantTo - pub source: String, // Key of the source entity - pub target: String, // Key of the target entity + pub type_: String, // e.g., RelatedTo, RelevantTo + pub source: String, // Key of the source entity + pub target: String, // Key of the target entity } /// Represents the entire graph analysis result from the LLM. @@ -42,11 +36,15 @@ pub struct LLMGraphAnalysisResult { } impl LLMGraphAnalysisResult { - pub fn to_database_entities(&self, source_id: &Uuid) -> (Vec, Vec) { + pub fn to_database_entities( + &self, + source_id: &Uuid, + ) -> (Vec, Vec) { let mut mapper = GraphMapper::new(); - + // First pass: Create all entities and map their keys to UUIDs - let entities: Vec = self.knowledge_entities + let entities: Vec = self + .knowledge_entities .iter() .map(|llm_entity| { let id = mapper.assign_id(&llm_entity.key); @@ -62,12 +60,13 @@ impl LLMGraphAnalysisResult { .collect(); // Second pass: Create relationships using mapped UUIDs - let relationships: Vec = self.relationships + let relationships: Vec = self + .relationships .iter() .filter_map(|llm_rel| { let source_id = mapper.get_id(&llm_rel.source)?; let target_id = mapper.get_id(&llm_rel.target)?; - + Some(KnowledgeRelationship { id: Uuid::new_v4().to_string(), out: source_id.to_string(), @@ -83,69 +82,72 @@ impl LLMGraphAnalysisResult { } /// Sends text to an LLM for analysis. -pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_client: &SurrealDbClient) -> Result { +pub async fn create_json_ld( + category: &str, + instructions: &str, + text: &str, + db_client: &SurrealDbClient, +) -> Result { // Get the nodes from the database let entities: Vec = db_client.client.select("knowledge_entity").await?; info!("{:?}", entities); let deleted: Vec = db_client.client.delete("knowledge_entity").await?; - info!{"{:?}", deleted}; + info! {"{:?}", deleted}; - - - let client = async_openai::Client::new(); - let schema = json!({ - "type": "object", - "properties": { - "knowledge_entities": { - "type": "array", - "items": { - "type": "object", - "properties": { - "key": { "type": "string" }, - "name": { "type": "string" }, - "description": { "type": "string" }, - "entity_type": { - "type": "string", - "enum": ["idea", "project", "document", "page", "textsnippet"] - } - }, - "required": ["key", "name", "description", "entity_type"], - "additionalProperties": false + let client = async_openai::Client::new(); + let schema = json!({ + "type": "object", + "properties": { + "knowledge_entities": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { "type": "string" }, + "name": { "type": "string" }, + "description": { "type": "string" }, + "entity_type": { + "type": "string", + "enum": ["idea", "project", "document", "page", "textsnippet"] } }, - "relationships": { - "type": "array", - "items": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": ["RelatedTo", "RelevantTo", "SimilarTo"] - }, - "source": { "type": "string" }, - "target": { "type": "string" } - }, - "required": ["type", "source", "target"], - "additionalProperties": false - } - } - }, - "required": ["knowledge_entities", "relationships"], - "additionalProperties": false - }); - - let response_format = async_openai::types::ResponseFormat::JsonSchema { - json_schema: async_openai::types::ResponseFormatJsonSchema { - description: Some("Structured analysis of the submitted content".into()), - name: "content_analysis".into(), - schema: Some(schema), - strict: Some(true), + "required": ["key", "name", "description", "entity_type"], + "additionalProperties": false + } + }, + "relationships": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["RelatedTo", "RelevantTo", "SimilarTo"] + }, + "source": { "type": "string" }, + "target": { "type": "string" } }, - }; + "required": ["type", "source", "target"], + "additionalProperties": false + } + } + }, + "required": ["knowledge_entities", "relationships"], + "additionalProperties": false + }); - // Construct the system and user messages - let system_message = r#" + let response_format = async_openai::types::ResponseFormat::JsonSchema { + json_schema: async_openai::types::ResponseFormatJsonSchema { + description: Some("Structured analysis of the submitted content".into()), + name: "content_analysis".into(), + schema: Some(schema), + strict: Some(true), + }, + }; + + // Construct the system and user messages + let system_message = r#" You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. The JSON should have the following structure: @@ -177,46 +179,48 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_c 4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo. 5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity" 6. Only create relationships between existing KnowledgeEntities. - "#; - + "#; - let user_message = format!( - "Category: {}\nInstructions: {}\nContent:\n{}", - category, instructions, text - ); + let user_message = format!( + "Category: {}\nInstructions: {}\nContent:\n{}", + category, instructions, text + ); - // Build the chat completion request - let request = CreateChatCompletionRequestArgs::default() - .model("gpt-4o-mini") - .max_tokens(2048u32) - .messages([ - ChatCompletionRequestSystemMessage::from(system_message).into(), - ChatCompletionRequestUserMessage::from(user_message).into(), - ]) - .response_format(response_format) - .build().map_err(|e| ProcessingError::LLMError(e.to_string()))?; + // Build the chat completion request + let request = CreateChatCompletionRequestArgs::default() + .model("gpt-4o-mini") + .max_tokens(2048u32) + .messages([ + ChatCompletionRequestSystemMessage::from(system_message).into(), + ChatCompletionRequestUserMessage::from(user_message).into(), + ]) + .response_format(response_format) + .build() + .map_err(|e| ProcessingError::LLMError(e.to_string()))?; - // Send the request to OpenAI - let response = client.chat().create(request).await.map_err(|e| { - ProcessingError::LLMError(format!("OpenAI API request failed: {}", e)) - })?; + // Send the request to OpenAI + let response = client + .chat() + .create(request) + .await + .map_err(|e| ProcessingError::LLMError(format!("OpenAI API request failed: {}", e)))?; - debug!("{:?}", response); + debug!("{:?}", response); - // Extract and parse the response - for choice in response.choices { - if let Some(content) = choice.message.content { - let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| { - ProcessingError::LLMError(format!( - "Failed to parse LLM response into analysis: {}", - e - )) - })?; - return Ok(analysis); - } + // Extract and parse the response + for choice in response.choices { + if let Some(content) = choice.message.content { + let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| { + ProcessingError::LLMError(format!( + "Failed to parse LLM response into analysis: {}", + e + )) + })?; + return Ok(analysis); } - - Err(ProcessingError::LLMError( - "No content found in LLM response".into(), - )) } + + Err(ProcessingError::LLMError( + "No content found in LLM response".into(), + )) +}