working storing relationships

2026-07-07 05:15:08 +02:00 · 2024-11-06 21:24:30 +01:00
parent 6d8cd05c1a
commit 3f06bf969a
3 changed files with 120 additions and 115 deletions
@@ -51,6 +51,7 @@ impl From<String> for KnowledgeEntityType {
 /// Represents a relationship between two knowledge entities.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct KnowledgeRelationship {
    #[serde(deserialize_with = "thing_to_string")]
    pub id: String,
    #[serde(rename = "in")]
    pub in_: String, // Target KnowledgeEntity ID
@@ -84,13 +84,13 @@ impl TextContent {
        for relationship in relationships {
            // info!("{:?}", relationship);
-            // let _created: Option<Record> = db_client
+            let _created: Option<KnowledgeRelationship> = db_client
-            //     .client
+                .client
-            //     .insert(("knowledge_relationship", &relationship.id.to_string()))
+                .insert(("knowledge_relationship", &relationship.id.to_string()))
-            //     .content(relationship)
+                .content(relationship)
-            //     .await?;
+                .await?;
-            // debug!("{:?}",_created);
+            debug!("{:?}",_created);
        }
@@ -1,37 +1,31 @@
 use async_openai::types::ChatCompletionRequestSystemMessage;
 use async_openai::types::ChatCompletionRequestUserMessage;
 use async_openai::types::CreateChatCompletionRequestArgs;
 use serde::Deserialize;
 use serde::Serialize;
 use surrealdb::sql::Thing;
 use surrealdb::RecordId;
 use tracing::debug;
 use tracing::info;
 use uuid::Uuid;
 use crate::models::graph_entities::GraphMapper;
 use crate::models::graph_entities::KnowledgeEntity;
 use crate::models::graph_entities::KnowledgeEntityType;
 use crate::models::graph_entities::KnowledgeRelationship;
 use crate::models::text_content::ProcessingError;
 use crate::surrealdb::SurrealDbClient;
 use async_openai::types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessage, ChatCompletionRequestSystemMessage };
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 use tracing::{info,debug};
 use uuid::Uuid;
 /// Represents a single knowledge entity from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMKnowledgeEntity {
-    pub key: String,               // Temporary identifier
+    pub key: String, // Temporary identifier
    pub name: String,
    pub description: String,
-    pub entity_type: String,       // Should match KnowledgeEntityType variants
+    pub entity_type: String, // Should match KnowledgeEntityType variants
 }
 /// Represents a single relationship from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMRelationship {
    #[serde(rename = "type")]
-    pub type_: String,              // e.g., RelatedTo, RelevantTo
+    pub type_: String, // e.g., RelatedTo, RelevantTo
-    pub source: String,             // Key of the source entity
+    pub source: String, // Key of the source entity
-    pub target: String,             // Key of the target entity
+    pub target: String, // Key of the target entity
 }
 /// Represents the entire graph analysis result from the LLM.
@@ -42,11 +36,15 @@ pub struct LLMGraphAnalysisResult {
 }
 impl LLMGraphAnalysisResult {
-    pub fn to_database_entities(&self, source_id: &Uuid) -> (Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>) {
+    pub fn to_database_entities(
        &self,
        source_id: &Uuid,
    ) -> (Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>) {
        let mut mapper = GraphMapper::new();
-        
+
        // First pass: Create all entities and map their keys to UUIDs
-        let entities: Vec<KnowledgeEntity> = self.knowledge_entities
+        let entities: Vec<KnowledgeEntity> = self
            .knowledge_entities
            .iter()
            .map(|llm_entity| {
                let id = mapper.assign_id(&llm_entity.key);
@@ -62,12 +60,13 @@ impl LLMGraphAnalysisResult {
            .collect();
        // Second pass: Create relationships using mapped UUIDs
-        let relationships: Vec<KnowledgeRelationship> = self.relationships
+        let relationships: Vec<KnowledgeRelationship> = self
            .relationships
            .iter()
            .filter_map(|llm_rel| {
                let source_id = mapper.get_id(&llm_rel.source)?;
                let target_id = mapper.get_id(&llm_rel.target)?;
-                
+
                Some(KnowledgeRelationship {
                    id: Uuid::new_v4().to_string(),
                    out: source_id.to_string(),
@@ -83,69 +82,72 @@ impl LLMGraphAnalysisResult {
 }
 /// Sends text to an LLM for analysis.
-pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_client: &SurrealDbClient) -> Result<LLMGraphAnalysisResult, ProcessingError> {
+pub async fn create_json_ld(
    category: &str,
    instructions: &str,
    text: &str,
    db_client: &SurrealDbClient,
 ) -> Result<LLMGraphAnalysisResult, ProcessingError> {
    // Get the nodes from the database
    let entities: Vec<KnowledgeEntity> = db_client.client.select("knowledge_entity").await?;
    info!("{:?}", entities);
    let deleted: Vec<KnowledgeEntity> = db_client.client.delete("knowledge_entity").await?;
-    info!{"{:?}", deleted};
+    info! {"{:?}", deleted};
-    
+    let client = async_openai::Client::new();
-    
+    let schema = json!({
-        let client = async_openai::Client::new();
+      "type": "object",
-        let schema = json!({
+      "properties": {
-          "type": "object",
+        "knowledge_entities": {
-          "properties": {
+          "type": "array",
-            "knowledge_entities": {
+          "items": {
-              "type": "array",
+            "type": "object",
-              "items": {  
+            "properties": {
-                "type": "object",
+              "key": { "type": "string" },
-                "properties": {
+              "name": { "type": "string" },
-                  "key": { "type": "string" },
+              "description": { "type": "string" },
-                  "name": { "type": "string" },
+              "entity_type": {
-                  "description": { "type": "string" },
+                "type": "string",
-                  "entity_type": { 
+                "enum": ["idea", "project", "document", "page", "textsnippet"]
                    "type": "string",
                    "enum": ["idea", "project", "document", "page", "textsnippet"]
                  }
                },
                "required": ["key", "name", "description", "entity_type"],
                "additionalProperties": false
              }
            },
-            "relationships": {
+            "required": ["key", "name", "description", "entity_type"],
-              "type": "array",
+            "additionalProperties": false
-              "items": {
+          }
-                "type": "object",
+        },
-                "properties": {
+        "relationships": {
-                  "type": { 
+          "type": "array",
-                    "type": "string", 
+          "items": {
-                    "enum": ["RelatedTo", "RelevantTo", "SimilarTo"] 
+            "type": "object",
-                  },
+            "properties": {
-                  "source": { "type": "string" },
+              "type": {
-                  "target": { "type": "string" }
+                "type": "string",
-                },
+                "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]
-                "required": ["type", "source", "target"],
+              },
-                "additionalProperties": false
+              "source": { "type": "string" },
-              }
+              "target": { "type": "string" }
            }
          },
          "required": ["knowledge_entities", "relationships"],
          "additionalProperties": false
        });
        let response_format = async_openai::types::ResponseFormat::JsonSchema {
            json_schema: async_openai::types::ResponseFormatJsonSchema {
                description: Some("Structured analysis of the submitted content".into()),
                name: "content_analysis".into(),
                schema: Some(schema),
                strict: Some(true),
            },
-        };
+            "required": ["type", "source", "target"],
            "additionalProperties": false
          }
        }
      },
      "required": ["knowledge_entities", "relationships"],
      "additionalProperties": false
    });
-        // Construct the system and user messages
+    let response_format = async_openai::types::ResponseFormat::JsonSchema {
-        let system_message = r#"
+        json_schema: async_openai::types::ResponseFormatJsonSchema {
            description: Some("Structured analysis of the submitted content".into()),
            name: "content_analysis".into(),
            schema: Some(schema),
            strict: Some(true),
        },
    };
    // Construct the system and user messages
    let system_message = r#"
            You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database.
            The JSON should have the following structure:
@@ -177,46 +179,48 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_c
            4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
            5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
            6. Only create relationships between existing KnowledgeEntities.
-            "#; 
+            "#;
-        let user_message = format!(
+    let user_message = format!(
-            "Category: {}\nInstructions: {}\nContent:\n{}",
+        "Category: {}\nInstructions: {}\nContent:\n{}",
-            category, instructions, text
+        category, instructions, text
-        );
+    );
-        // Build the chat completion request
+    // Build the chat completion request
-        let request = CreateChatCompletionRequestArgs::default()
+    let request = CreateChatCompletionRequestArgs::default()
-            .model("gpt-4o-mini") 
+        .model("gpt-4o-mini")
-            .max_tokens(2048u32)
+        .max_tokens(2048u32)
-            .messages([
+        .messages([
-                ChatCompletionRequestSystemMessage::from(system_message).into(),
+            ChatCompletionRequestSystemMessage::from(system_message).into(),
-                ChatCompletionRequestUserMessage::from(user_message).into(),
+            ChatCompletionRequestUserMessage::from(user_message).into(),
-            ])
+        ])
-            .response_format(response_format)
+        .response_format(response_format)
-            .build().map_err(|e| ProcessingError::LLMError(e.to_string()))?;
+        .build()
        .map_err(|e| ProcessingError::LLMError(e.to_string()))?;
-        // Send the request to OpenAI
+    // Send the request to OpenAI
-        let response = client.chat().create(request).await.map_err(|e| {
+    let response = client
-            ProcessingError::LLMError(format!("OpenAI API request failed: {}", e))
+        .chat()
-        })?;
+        .create(request)
        .await
        .map_err(|e| ProcessingError::LLMError(format!("OpenAI API request failed: {}", e)))?;
-        debug!("{:?}", response);
+    debug!("{:?}", response);
-        // Extract and parse the response
+    // Extract and parse the response
-        for choice in response.choices {
+    for choice in response.choices {
-            if let Some(content) = choice.message.content {
+        if let Some(content) = choice.message.content {
-                let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| {
+            let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| {
-                    ProcessingError::LLMError(format!(
+                ProcessingError::LLMError(format!(
-                        "Failed to parse LLM response into analysis: {}",
+                    "Failed to parse LLM response into analysis: {}",
-                        e
+                    e
-                    ))
+                ))
-                })?;
+            })?;
-                return Ok(analysis);
+            return Ok(analysis);
            }
        }
        Err(ProcessingError::LLMError(
            "No content found in LLM response".into(),
        ))
    }
    Err(ProcessingError::LLMError(
        "No content found in LLM response".into(),
    ))
 }