creation of objects and relationships

This commit is contained in:
Per Stark
2024-10-23 12:38:56 +02:00
parent 8318b034c8
commit e508630acc
3 changed files with 79 additions and 64 deletions

View File

@@ -38,7 +38,7 @@ impl From<String> for KnowledgeEntityType {
/// Represents a relationship between two knowledge entities.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Relationship {
pub struct KnowledgeRelationship {
pub id: Uuid, // Generated in Rust
#[serde(rename = "in")]
pub in_: Uuid, // Target KnowledgeEntity ID
@@ -49,6 +49,7 @@ pub struct Relationship {
use std::collections::HashMap;
use crate::utils::llm::LLMGraphAnalysisResult;
use crate::utils::llm::LLMKnowledgeEntity;
use crate::utils::llm::LLMRelationship;
@@ -90,9 +91,9 @@ impl From<&LLMKnowledgeEntity> for KnowledgeEntity {
}
}
impl From<&LLMRelationship> for Relationship {
impl From<&LLMRelationship> for KnowledgeRelationship {
fn from(llm_rel: &LLMRelationship) -> Self {
Relationship {
KnowledgeRelationship {
id: Uuid::new_v4(),
in_: Uuid::nil(), // Placeholder; to be set after mapping
out: Uuid::nil(), // Placeholder; to be set after mapping

View File

@@ -4,6 +4,8 @@ use uuid::Uuid;
use crate::{models::file_info::FileInfo, utils::llm::create_json_ld};
use thiserror::Error;
use super::graph_entities::{KnowledgeEntity, KnowledgeRelationship};
/// Represents a single piece of text content extracted from various sources.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TextContent {
@@ -14,42 +16,6 @@ pub struct TextContent {
pub category: String,
}
/// Represents a generic knowledge entity in the graph.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct KnowledgeEntity {
pub id: Uuid,
pub name: String,
pub description: String,
pub source_uuid: Uuid,
pub entity_type: String,
}
/// A struct representing a knowledge source in the graph database.
#[derive(Deserialize, Debug, Serialize, Clone )]
pub struct KnowledgeSource {
pub id: Uuid,
pub title: String,
pub description: String,
pub relationships: Vec<Relationship>,
}
/// A struct representing a relationship between knowledge sources.
#[derive(Deserialize, Clone, Serialize, Debug)]
pub struct Relationship {
#[serde(rename = "type")]
pub type_: String,
pub target: String,
}
/// A struct representing the result of an LLM analysis.
#[derive(Deserialize, Debug,Serialize)]
pub struct AnalysisResult {
pub knowledge_source: KnowledgeSource,
pub category: String,
pub instructions: String,
}
/// Error types for processing `TextContent`.
#[derive(Error, Debug)]
pub enum ProcessingError {
@@ -74,37 +40,39 @@ impl TextContent {
// Step 1: Send to LLM for analysis
let analysis = create_json_ld(&self.category, &self.instructions, &self.text).await?;
info!("{:#?}", &analysis);
// Step 2: Store analysis results in Graph DB
// client.store_knowledge_source(&analysis.knowledge_source).await?;
// Step 3: Store relationships in Graph DB
// for relationship in analysis.knowledge_source.relationships.iter() {
// client
// .store_relationship(&analysis.knowledge_source.id, relationship)
// .await?;
// }
// info!("{:#?}", &analysis);
// Step 3: Split text and store in Vector DB
// Step 2: Convert LLM analysis to database entities
let (entities, relationships) = analysis.to_database_entities();
// Step 3: Store in database
self.store_in_graph_db(entities, relationships).await?;
// Step 4: Split text and store in Vector DB
// self.store_in_vector_db().await?;
Ok(())
}
/// Stores analysis results in a graph database.
#[allow(dead_code)]
async fn store_in_graph_db(&self, _analysis: &AnalysisResult) -> Result<(), ProcessingError> {
// TODO: Implement storage logic for your specific graph database.
// Example:
/*
let graph_db = GraphDB::new("http://graph-db:8080");
graph_db.insert_analysis(analysis).await.map_err(|e| ProcessingError::GraphDBError(e.to_string()))?;
*/
unimplemented!()
async fn store_in_graph_db(
&self,
entities: Vec<KnowledgeEntity>,
relationships: Vec<KnowledgeRelationship>
) -> Result<(), ProcessingError> {
for entity in entities {
info!("{:?}", entity);
}
for relationship in relationships {
info!("{:?}", relationship);
}
Ok(())
}
/// Splits text and stores it in a vector database.
#[allow(dead_code)]
async fn store_in_vector_db(&self) -> Result<(), ProcessingError> {

View File

@@ -4,9 +4,13 @@ use async_openai::types::CreateChatCompletionRequestArgs;
use serde::Deserialize;
use serde::Serialize;
use tracing::debug;
use uuid::Uuid;
use crate::models::graph_entities::GraphMapper;
use crate::models::graph_entities::KnowledgeEntity;
use crate::models::graph_entities::KnowledgeEntityType;
use crate::models::graph_entities::KnowledgeRelationship;
use crate::models::text_content::ProcessingError;
use serde_json::json;
use crate::models::text_content::AnalysisResult;
/// Represents a single knowledge entity from the LLM.
#[derive(Debug, Serialize, Deserialize, Clone)]
@@ -33,6 +37,47 @@ pub struct LLMGraphAnalysisResult {
pub relationships: Vec<LLMRelationship>,
}
impl LLMGraphAnalysisResult {
pub fn to_database_entities(&self) -> (Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>) {
let mut mapper = GraphMapper::new();
// First pass: Create all entities and map their keys to UUIDs
let entities: Vec<KnowledgeEntity> = self.knowledge_entities
.iter()
.map(|llm_entity| {
let id = mapper.assign_id(&llm_entity.key);
KnowledgeEntity {
id,
name: llm_entity.name.clone(),
description: llm_entity.description.clone(),
entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()),
source_id: None,
metadata: None,
}
})
.collect();
// Second pass: Create relationships using mapped UUIDs
let relationships: Vec<KnowledgeRelationship> = self.relationships
.iter()
.filter_map(|llm_rel| {
let source_id = mapper.get_id(&llm_rel.source)?;
let target_id = mapper.get_id(&llm_rel.target)?;
Some(KnowledgeRelationship {
id: Uuid::new_v4(),
out: *source_id,
in_: *target_id,
relationship_type: llm_rel.type_.clone(),
metadata: None,
})
})
.collect();
(entities, relationships)
}
}
/// Sends text to an LLM for analysis.
pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> Result<LLMGraphAnalysisResult, ProcessingError> {
let client = async_openai::Client::new();
@@ -117,8 +162,9 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> R
2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.
3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity.
6. Optionally, add any relevant metadata within each object as needed."#;
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
6. Only create relationships between existing KnowledgeEntities.
"#;
let user_message = format!(