working storing relationships

This commit is contained in:
Per Stark
2024-11-06 21:24:30 +01:00
parent 6d8cd05c1a
commit 3f06bf969a
3 changed files with 120 additions and 115 deletions

View File

@@ -51,6 +51,7 @@ impl From<String> for KnowledgeEntityType {
/// Represents a relationship between two knowledge entities. /// Represents a relationship between two knowledge entities.
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
pub struct KnowledgeRelationship { pub struct KnowledgeRelationship {
#[serde(deserialize_with = "thing_to_string")]
pub id: String, pub id: String,
#[serde(rename = "in")] #[serde(rename = "in")]
pub in_: String, // Target KnowledgeEntity ID pub in_: String, // Target KnowledgeEntity ID

View File

@@ -84,13 +84,13 @@ impl TextContent {
for relationship in relationships { for relationship in relationships {
// info!("{:?}", relationship); // info!("{:?}", relationship);
// let _created: Option<Record> = db_client let _created: Option<KnowledgeRelationship> = db_client
// .client .client
// .insert(("knowledge_relationship", &relationship.id.to_string())) .insert(("knowledge_relationship", &relationship.id.to_string()))
// .content(relationship) .content(relationship)
// .await?; .await?;
// debug!("{:?}",_created); debug!("{:?}",_created);
} }

View File

@@ -1,37 +1,31 @@
use async_openai::types::ChatCompletionRequestSystemMessage;
use async_openai::types::ChatCompletionRequestUserMessage;
use async_openai::types::CreateChatCompletionRequestArgs;
use serde::Deserialize;
use serde::Serialize;
use surrealdb::sql::Thing;
use surrealdb::RecordId;
use tracing::debug;
use tracing::info;
use uuid::Uuid;
use crate::models::graph_entities::GraphMapper; use crate::models::graph_entities::GraphMapper;
use crate::models::graph_entities::KnowledgeEntity; use crate::models::graph_entities::KnowledgeEntity;
use crate::models::graph_entities::KnowledgeEntityType; use crate::models::graph_entities::KnowledgeEntityType;
use crate::models::graph_entities::KnowledgeRelationship; use crate::models::graph_entities::KnowledgeRelationship;
use crate::models::text_content::ProcessingError; use crate::models::text_content::ProcessingError;
use crate::surrealdb::SurrealDbClient; use crate::surrealdb::SurrealDbClient;
use async_openai::types::{CreateChatCompletionRequestArgs, ChatCompletionRequestUserMessage, ChatCompletionRequestSystemMessage };
use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use tracing::{info,debug};
use uuid::Uuid;
/// Represents a single knowledge entity from the LLM. /// Represents a single knowledge entity from the LLM.
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LLMKnowledgeEntity { pub struct LLMKnowledgeEntity {
pub key: String, // Temporary identifier pub key: String, // Temporary identifier
pub name: String, pub name: String,
pub description: String, pub description: String,
pub entity_type: String, // Should match KnowledgeEntityType variants pub entity_type: String, // Should match KnowledgeEntityType variants
} }
/// Represents a single relationship from the LLM. /// Represents a single relationship from the LLM.
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LLMRelationship { pub struct LLMRelationship {
#[serde(rename = "type")] #[serde(rename = "type")]
pub type_: String, // e.g., RelatedTo, RelevantTo pub type_: String, // e.g., RelatedTo, RelevantTo
pub source: String, // Key of the source entity pub source: String, // Key of the source entity
pub target: String, // Key of the target entity pub target: String, // Key of the target entity
} }
/// Represents the entire graph analysis result from the LLM. /// Represents the entire graph analysis result from the LLM.
@@ -42,11 +36,15 @@ pub struct LLMGraphAnalysisResult {
} }
impl LLMGraphAnalysisResult { impl LLMGraphAnalysisResult {
pub fn to_database_entities(&self, source_id: &Uuid) -> (Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>) { pub fn to_database_entities(
&self,
source_id: &Uuid,
) -> (Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>) {
let mut mapper = GraphMapper::new(); let mut mapper = GraphMapper::new();
// First pass: Create all entities and map their keys to UUIDs // First pass: Create all entities and map their keys to UUIDs
let entities: Vec<KnowledgeEntity> = self.knowledge_entities let entities: Vec<KnowledgeEntity> = self
.knowledge_entities
.iter() .iter()
.map(|llm_entity| { .map(|llm_entity| {
let id = mapper.assign_id(&llm_entity.key); let id = mapper.assign_id(&llm_entity.key);
@@ -62,12 +60,13 @@ impl LLMGraphAnalysisResult {
.collect(); .collect();
// Second pass: Create relationships using mapped UUIDs // Second pass: Create relationships using mapped UUIDs
let relationships: Vec<KnowledgeRelationship> = self.relationships let relationships: Vec<KnowledgeRelationship> = self
.relationships
.iter() .iter()
.filter_map(|llm_rel| { .filter_map(|llm_rel| {
let source_id = mapper.get_id(&llm_rel.source)?; let source_id = mapper.get_id(&llm_rel.source)?;
let target_id = mapper.get_id(&llm_rel.target)?; let target_id = mapper.get_id(&llm_rel.target)?;
Some(KnowledgeRelationship { Some(KnowledgeRelationship {
id: Uuid::new_v4().to_string(), id: Uuid::new_v4().to_string(),
out: source_id.to_string(), out: source_id.to_string(),
@@ -83,69 +82,72 @@ impl LLMGraphAnalysisResult {
} }
/// Sends text to an LLM for analysis. /// Sends text to an LLM for analysis.
pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_client: &SurrealDbClient) -> Result<LLMGraphAnalysisResult, ProcessingError> { pub async fn create_json_ld(
category: &str,
instructions: &str,
text: &str,
db_client: &SurrealDbClient,
) -> Result<LLMGraphAnalysisResult, ProcessingError> {
// Get the nodes from the database // Get the nodes from the database
let entities: Vec<KnowledgeEntity> = db_client.client.select("knowledge_entity").await?; let entities: Vec<KnowledgeEntity> = db_client.client.select("knowledge_entity").await?;
info!("{:?}", entities); info!("{:?}", entities);
let deleted: Vec<KnowledgeEntity> = db_client.client.delete("knowledge_entity").await?; let deleted: Vec<KnowledgeEntity> = db_client.client.delete("knowledge_entity").await?;
info!{"{:?}", deleted}; info! {"{:?}", deleted};
let client = async_openai::Client::new();
let schema = json!({
let client = async_openai::Client::new(); "type": "object",
let schema = json!({ "properties": {
"type": "object", "knowledge_entities": {
"properties": { "type": "array",
"knowledge_entities": { "items": {
"type": "array", "type": "object",
"items": { "properties": {
"type": "object", "key": { "type": "string" },
"properties": { "name": { "type": "string" },
"key": { "type": "string" }, "description": { "type": "string" },
"name": { "type": "string" }, "entity_type": {
"description": { "type": "string" }, "type": "string",
"entity_type": { "enum": ["idea", "project", "document", "page", "textsnippet"]
"type": "string",
"enum": ["idea", "project", "document", "page", "textsnippet"]
}
},
"required": ["key", "name", "description", "entity_type"],
"additionalProperties": false
} }
}, },
"relationships": { "required": ["key", "name", "description", "entity_type"],
"type": "array", "additionalProperties": false
"items": { }
"type": "object", },
"properties": { "relationships": {
"type": { "type": "array",
"type": "string", "items": {
"enum": ["RelatedTo", "RelevantTo", "SimilarTo"] "type": "object",
}, "properties": {
"source": { "type": "string" }, "type": {
"target": { "type": "string" } "type": "string",
}, "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]
"required": ["type", "source", "target"], },
"additionalProperties": false "source": { "type": "string" },
} "target": { "type": "string" }
}
},
"required": ["knowledge_entities", "relationships"],
"additionalProperties": false
});
let response_format = async_openai::types::ResponseFormat::JsonSchema {
json_schema: async_openai::types::ResponseFormatJsonSchema {
description: Some("Structured analysis of the submitted content".into()),
name: "content_analysis".into(),
schema: Some(schema),
strict: Some(true),
}, },
}; "required": ["type", "source", "target"],
"additionalProperties": false
}
}
},
"required": ["knowledge_entities", "relationships"],
"additionalProperties": false
});
// Construct the system and user messages let response_format = async_openai::types::ResponseFormat::JsonSchema {
let system_message = r#" json_schema: async_openai::types::ResponseFormatJsonSchema {
description: Some("Structured analysis of the submitted content".into()),
name: "content_analysis".into(),
schema: Some(schema),
strict: Some(true),
},
};
// Construct the system and user messages
let system_message = r#"
You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database.
The JSON should have the following structure: The JSON should have the following structure:
@@ -177,46 +179,48 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str, db_c
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo. 4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity" 5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
6. Only create relationships between existing KnowledgeEntities. 6. Only create relationships between existing KnowledgeEntities.
"#; "#;
let user_message = format!( let user_message = format!(
"Category: {}\nInstructions: {}\nContent:\n{}", "Category: {}\nInstructions: {}\nContent:\n{}",
category, instructions, text category, instructions, text
); );
// Build the chat completion request // Build the chat completion request
let request = CreateChatCompletionRequestArgs::default() let request = CreateChatCompletionRequestArgs::default()
.model("gpt-4o-mini") .model("gpt-4o-mini")
.max_tokens(2048u32) .max_tokens(2048u32)
.messages([ .messages([
ChatCompletionRequestSystemMessage::from(system_message).into(), ChatCompletionRequestSystemMessage::from(system_message).into(),
ChatCompletionRequestUserMessage::from(user_message).into(), ChatCompletionRequestUserMessage::from(user_message).into(),
]) ])
.response_format(response_format) .response_format(response_format)
.build().map_err(|e| ProcessingError::LLMError(e.to_string()))?; .build()
.map_err(|e| ProcessingError::LLMError(e.to_string()))?;
// Send the request to OpenAI // Send the request to OpenAI
let response = client.chat().create(request).await.map_err(|e| { let response = client
ProcessingError::LLMError(format!("OpenAI API request failed: {}", e)) .chat()
})?; .create(request)
.await
.map_err(|e| ProcessingError::LLMError(format!("OpenAI API request failed: {}", e)))?;
debug!("{:?}", response); debug!("{:?}", response);
// Extract and parse the response // Extract and parse the response
for choice in response.choices { for choice in response.choices {
if let Some(content) = choice.message.content { if let Some(content) = choice.message.content {
let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| { let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| {
ProcessingError::LLMError(format!( ProcessingError::LLMError(format!(
"Failed to parse LLM response into analysis: {}", "Failed to parse LLM response into analysis: {}",
e e
)) ))
})?; })?;
return Ok(analysis); return Ok(analysis);
}
} }
Err(ProcessingError::LLMError(
"No content found in LLM response".into(),
))
} }
Err(ProcessingError::LLMError(
"No content found in LLM response".into(),
))
}