working storing relationships

2026-03-23 18:01:26 +01:00 · 2024-11-06 21:24:30 +01:00
parent 6d8cd05c1a
commit 3f06bf969a
3 changed files with 120 additions and 115 deletions
--- a/src/models/graph_entities.rs
+++ b/src/models/graph_entities.rs
@@ -51,6 +51,7 @@ impl From<String> for KnowledgeEntityType {
 /// Represents a relationship between two knowledge entities.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct KnowledgeRelationship {
+    #[serde(deserialize_with = "thing_to_string")]
    pub id: String,
    #[serde(rename = "in")]
    pub in_: String, // Target KnowledgeEntity ID
--- a/src/models/text_content.rs
+++ b/src/models/text_content.rs
@@ -84,13 +84,13 @@ impl TextContent {
        for relationship in relationships {
            // info!("{:?}", relationship);

-            // let _created: Option<Record> = db_client
-            //     .client
-            //     .insert(("knowledge_relationship", &relationship.id.to_string()))
-            //     .content(relationship)
-            //     .await?;
+            let _created: Option<KnowledgeRelationship> = db_client
+                .client
+                .insert(("knowledge_relationship", &relationship.id.to_string()))
+                .content(relationship)
+                .await?;

-            // debug!("{:?}",_created);
+            debug!("{:?}",_created);
        
        }

--- a/src/utils/llm.rs
+++ b/src/utils/llm.rs
@@ -1,37 +1,31 @@
-use async_openai::types::ChatCompletionRequestSystemMessage;
-use async_openai::types::ChatCompletionRequestUserMessage;
-use async_openai::types::CreateChatCompletionRequestArgs;
-use serde::Deserialize;
-use serde::Serialize;
-use surrealdb::sql::Thing;
-use surrealdb::RecordId;
-use tracing::debug;
-use tracing::info;
-use uuid::Uuid;
 use crate::models::graph_entities::GraphMapper;
 use crate::models::graph_entities::KnowledgeEntity;
 use crate::models::graph_entities::KnowledgeEntityType;
 use crate::models::graph_entities::KnowledgeRelationship;
 use crate::models::text_content::ProcessingError;
 use crate::surrealdb::SurrealDbClient;
+use async_openai::types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessage, ChatCompletionRequestSystemMessage };
+use serde::{Deserialize, Serialize};
 use serde_json::json;
+use tracing::{info,debug};
+use uuid::Uuid;

 /// Represents a single knowledge entity from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMKnowledgeEntity {
-    pub key: String,               // Temporary identifier
+    pub key: String, // Temporary identifier
    pub name: String,
    pub description: String,
-    pub entity_type: String,       // Should match KnowledgeEntityType variants
+    pub entity_type: String, // Should match KnowledgeEntityType variants
 }

 /// Represents a single relationship from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMRelationship {
    #[serde(rename = "type")]
-    pub type_: String,              // e.g., RelatedTo, RelevantTo
-    pub source: String,             // Key of the source entity
-    pub target: String,             // Key of the target entity
+    pub type_: String, // e.g., RelatedTo, RelevantTo
+    pub source: String, // Key of the source entity
+    pub target: String, // Key of the target entity
 }

 /// Represents the entire graph analysis result from the LLM.
@@ -42,11 +36,15 @@ pub struct LLMGraphAnalysisResult {
 }

 impl LLMGraphAnalysisResult {
-    pub fn to_database_entities(&self, source_id: &Uuid) -> (Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>) {
+    pub fn to_database_entities(
+        &self,
+        source_id: &Uuid,
+    ) -> (Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>) {
        let mut mapper = GraphMapper::new();
-        
+
        // First pass: Create all entities and map their keys to UUIDs
-        let entities: Vec<KnowledgeEntity> = self.knowledge_entities
+        let entities: Vec<KnowledgeEntity> = self
+            .knowledge_entities
            .iter()
            .map(|llm_entity| {
                let id = mapper.assign_id(&llm_entity.key);
@@ -62,12 +60,13 @@ impl LLMGraphAnalysisResult {
            .collect();

        // Second pass: Create relationships using mapped UUIDs
-        let relationships: Vec<KnowledgeRelationship> = self.relationships
+        let relationships: Vec<KnowledgeRelationship> = self
+            .relationships
            .iter()
            .filter_map(|llm_rel| {
                let source_id = mapper.get_id(&llm_rel.source)?;
                let target_id = mapper.get_id(&llm_rel.target)?;
-                
+
                Some(KnowledgeRelationship {
                    id: Uuid::new_v4().to_string(),
                    out: source_id.to_string(),
@@ -83,69 +82,72 @@ impl LLMGraphAnalysisResult {
 }

 /// Sends text to an LLM for analysis.
-pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_client: &SurrealDbClient) -> Result<LLMGraphAnalysisResult, ProcessingError> {
+pub async fn create_json_ld(
+    category: &str,
+    instructions: &str,
+    text: &str,
+    db_client: &SurrealDbClient,
+) -> Result<LLMGraphAnalysisResult, ProcessingError> {
    // Get the nodes from the database
    let entities: Vec<KnowledgeEntity> = db_client.client.select("knowledge_entity").await?;
    info!("{:?}", entities);

    let deleted: Vec<KnowledgeEntity> = db_client.client.delete("knowledge_entity").await?;
-    info!{"{:?}", deleted};
+    info! {"{:?}", deleted};

-    
-    
-        let client = async_openai::Client::new();
-        let schema = json!({
-          "type": "object",
-          "properties": {
-            "knowledge_entities": {
-              "type": "array",
-              "items": {  
-                "type": "object",
-                "properties": {
-                  "key": { "type": "string" },
-                  "name": { "type": "string" },
-                  "description": { "type": "string" },
-                  "entity_type": { 
-                    "type": "string",
-                    "enum": ["idea", "project", "document", "page", "textsnippet"]
-                  }
-                },
-                "required": ["key", "name", "description", "entity_type"],
-                "additionalProperties": false
+    let client = async_openai::Client::new();
+    let schema = json!({
+      "type": "object",
+      "properties": {
+        "knowledge_entities": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "key": { "type": "string" },
+              "name": { "type": "string" },
+              "description": { "type": "string" },
+              "entity_type": {
+                "type": "string",
+                "enum": ["idea", "project", "document", "page", "textsnippet"]
              }
            },
-            "relationships": {
-              "type": "array",
-              "items": {
-                "type": "object",
-                "properties": {
-                  "type": { 
-                    "type": "string", 
-                    "enum": ["RelatedTo", "RelevantTo", "SimilarTo"] 
-                  },
-                  "source": { "type": "string" },
-                  "target": { "type": "string" }
-                },
-                "required": ["type", "source", "target"],
-                "additionalProperties": false
-              }
-            }
-          },
-          "required": ["knowledge_entities", "relationships"],
-          "additionalProperties": false
-        });
-
-        let response_format = async_openai::types::ResponseFormat::JsonSchema {
-            json_schema: async_openai::types::ResponseFormatJsonSchema {
-                description: Some("Structured analysis of the submitted content".into()),
-                name: "content_analysis".into(),
-                schema: Some(schema),
-                strict: Some(true),
+            "required": ["key", "name", "description", "entity_type"],
+            "additionalProperties": false
+          }
+        },
+        "relationships": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]
+              },
+              "source": { "type": "string" },
+              "target": { "type": "string" }
            },
-        };
+            "required": ["type", "source", "target"],
+            "additionalProperties": false
+          }
+        }
+      },
+      "required": ["knowledge_entities", "relationships"],
+      "additionalProperties": false
+    });

-        // Construct the system and user messages
-        let system_message = r#"
+    let response_format = async_openai::types::ResponseFormat::JsonSchema {
+        json_schema: async_openai::types::ResponseFormatJsonSchema {
+            description: Some("Structured analysis of the submitted content".into()),
+            name: "content_analysis".into(),
+            schema: Some(schema),
+            strict: Some(true),
+        },
+    };
+
+    // Construct the system and user messages
+    let system_message = r#"
            You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database.
            
            The JSON should have the following structure:
@@ -177,46 +179,48 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_c
            4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
            5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
            6. Only create relationships between existing KnowledgeEntities.
-            "#; 
-           
+            "#;

-        let user_message = format!(
-            "Category: {}\nInstructions: {}\nContent:\n{}",
-            category, instructions, text
-        );
+    let user_message = format!(
+        "Category: {}\nInstructions: {}\nContent:\n{}",
+        category, instructions, text
+    );

-        // Build the chat completion request
-        let request = CreateChatCompletionRequestArgs::default()
-            .model("gpt-4o-mini") 
-            .max_tokens(2048u32)
-            .messages([
-                ChatCompletionRequestSystemMessage::from(system_message).into(),
-                ChatCompletionRequestUserMessage::from(user_message).into(),
-            ])
-            .response_format(response_format)
-            .build().map_err(|e| ProcessingError::LLMError(e.to_string()))?;
+    // Build the chat completion request
+    let request = CreateChatCompletionRequestArgs::default()
+        .model("gpt-4o-mini")
+        .max_tokens(2048u32)
+        .messages([
+            ChatCompletionRequestSystemMessage::from(system_message).into(),
+            ChatCompletionRequestUserMessage::from(user_message).into(),
+        ])
+        .response_format(response_format)
+        .build()
+        .map_err(|e| ProcessingError::LLMError(e.to_string()))?;

-        // Send the request to OpenAI
-        let response = client.chat().create(request).await.map_err(|e| {
-            ProcessingError::LLMError(format!("OpenAI API request failed: {}", e))
-        })?;
+    // Send the request to OpenAI
+    let response = client
+        .chat()
+        .create(request)
+        .await
+        .map_err(|e| ProcessingError::LLMError(format!("OpenAI API request failed: {}", e)))?;

-        debug!("{:?}", response);
+    debug!("{:?}", response);

-        // Extract and parse the response
-        for choice in response.choices {
-            if let Some(content) = choice.message.content {
-                let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| {
-                    ProcessingError::LLMError(format!(
-                        "Failed to parse LLM response into analysis: {}",
-                        e
-                    ))
-                })?;
-                return Ok(analysis);
-            }
+    // Extract and parse the response
+    for choice in response.choices {
+        if let Some(content) = choice.message.content {
+            let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| {
+                ProcessingError::LLMError(format!(
+                    "Failed to parse LLM response into analysis: {}",
+                    e
+                ))
+            })?;
+            return Ok(analysis);
        }
-
-        Err(ProcessingError::LLMError(
-            "No content found in LLM response".into(),
-        ))
    }
+
+    Err(ProcessingError::LLMError(
+        "No content found in LLM response".into(),
+    ))
+}