feat: creation of nodes and edges complete

This commit is contained in:
Per Stark
2024-11-18 11:24:55 +01:00
parent d81825c786
commit 0b874b427a
5 changed files with 66 additions and 53 deletions

37
Cargo.lock generated
View File

@@ -1561,9 +1561,9 @@ dependencies = [
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
dependencies = [ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
@@ -1576,9 +1576,9 @@ dependencies = [
[[package]] [[package]]
name = "futures-channel" name = "futures-channel"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-sink", "futures-sink",
@@ -1586,15 +1586,15 @@ dependencies = [
[[package]] [[package]]
name = "futures-core" name = "futures-core"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
[[package]] [[package]]
name = "futures-executor" name = "futures-executor"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-task", "futures-task",
@@ -1603,9 +1603,9 @@ dependencies = [
[[package]] [[package]]
name = "futures-io" name = "futures-io"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
[[package]] [[package]]
name = "futures-lite" name = "futures-lite"
@@ -1637,9 +1637,9 @@ dependencies = [
[[package]] [[package]]
name = "futures-macro" name = "futures-macro"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -1648,15 +1648,15 @@ dependencies = [
[[package]] [[package]]
name = "futures-sink" name = "futures-sink"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
[[package]] [[package]]
name = "futures-task" name = "futures-task"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]] [[package]]
name = "futures-timer" name = "futures-timer"
@@ -1666,9 +1666,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.30" version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
dependencies = [ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
@@ -5340,6 +5340,7 @@ dependencies = [
"async-openai", "async-openai",
"axum", "axum",
"axum_typed_multipart", "axum_typed_multipart",
"futures",
"futures-lite 2.3.0", "futures-lite 2.3.0",
"lapin", "lapin",
"mime", "mime",

View File

@@ -7,6 +7,7 @@ edition = "2021"
async-openai = "0.24.1" async-openai = "0.24.1"
axum = { version = "0.7.5", features = ["multipart", "macros"] } axum = { version = "0.7.5", features = ["multipart", "macros"] }
axum_typed_multipart = "0.12.1" axum_typed_multipart = "0.12.1"
futures = "0.3.31"
futures-lite = "2.3.0" futures-lite = "2.3.0"
lapin = { version = "2.5.0", features = ["serde_json"] } lapin = { version = "2.5.0", features = ["serde_json"] }
mime = "0.3.17" mime = "0.3.17"

View File

@@ -79,6 +79,14 @@ impl GraphMapper {
key_to_id: HashMap::new(), key_to_id: HashMap::new(),
} }
} }
/// Get ID, tries to parse UUID
pub fn get_or_parse_id(&mut self, key: &str) -> Uuid {
if let Ok(parsed_uuid) = Uuid::parse_str(key) {
parsed_uuid
} else {
self.key_to_id.get(key).unwrap().clone()
}
}
/// Assigns a new UUID for a given key. /// Assigns a new UUID for a given key.
pub fn assign_id(&mut self, key: &str) -> Uuid { pub fn assign_id(&mut self, key: &str) -> Uuid {

View File

@@ -1,4 +1,4 @@
use async_openai::{error::OpenAIError, types::{CreateEmbeddingRequest, CreateEmbeddingRequestArgs}}; use async_openai::error::OpenAIError;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use surrealdb::{engine::remote::ws::Client, Surreal}; use surrealdb::{engine::remote::ws::Client, Surreal};
use tracing::{debug, info}; use tracing::{debug, info};
@@ -50,6 +50,15 @@ impl TextContent {
// Store TextContent // Store TextContent
let db_client = SurrealDbClient::new().await?; let db_client = SurrealDbClient::new().await?;
// let deleted: Vec<KnowledgeEntity> = db_client.delete("knowledge_entity").await?;
// info! {"{:?} KnowledgeEntities deleted", deleted.len()};
// let relationships_deleted: Vec<KnowledgeRelationship> =
// db_client.delete("knowledge_relationship").await?;
// info!("{:?} Relationships deleted", relationships_deleted.len());
// panic!("STOP");
// db_client.query("REMOVE INDEX embeddings ON knowledge_entity").await?; // db_client.query("REMOVE INDEX embeddings ON knowledge_entity").await?;
// db_client.query("DEFINE INDEX embeddings ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536").await?; // db_client.query("DEFINE INDEX embeddings ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536").await?;
db_client.query("REBUILD INDEX IF EXISTS embeddings ON knowledge_entity").await?; db_client.query("REBUILD INDEX IF EXISTS embeddings ON knowledge_entity").await?;
@@ -78,7 +87,7 @@ impl TextContent {
db_client: &Surreal<Client>, db_client: &Surreal<Client>,
) -> Result<(), ProcessingError> { ) -> Result<(), ProcessingError> {
for entity in &entities { for entity in &entities {
// info!("{:?}", &entity); info!("{:?}, {:?}, {:?}", &entity.id, &entity.name, &entity.description);
let _created: Option<KnowledgeEntity> = db_client let _created: Option<KnowledgeEntity> = db_client
.create(("knowledge_entity", &entity.id.to_string())) .create(("knowledge_entity", &entity.id.to_string()))
@@ -99,6 +108,18 @@ impl TextContent {
debug!("{:?}",_created); debug!("{:?}",_created);
} }
for relationship in &relationships {
let in_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity",relationship.in_.to_string())).await?;
let out_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity", relationship.out.to_string())).await?;
if let (Some(in_), Some(out)) = (in_entity, out_entity) {
info!("{} - {} is {} to {} - {}", in_.id, in_.name, relationship.relationship_type, out.id, out.name);
}
else {
info!("No in or out entities found");
}
}
info!("Inserted to database: {:?} entities, {:?} relationships", entities.len(), relationships.len()); info!("Inserted to database: {:?} entities, {:?} relationships", entities.len(), relationships.len());
Ok(()) Ok(())

View File

@@ -1,21 +1,16 @@
use core::panic;
use crate::models::graph_entities::{ use crate::models::graph_entities::{
GraphMapper, KnowledgeEntity, KnowledgeEntityType, KnowledgeRelationship, GraphMapper, KnowledgeEntity, KnowledgeEntityType, KnowledgeRelationship,
}; };
use crate::models::text_content::ProcessingError; use crate::models::text_content::ProcessingError;
use async_openai::types::{ use async_openai::types::{
ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage, ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
CreateChatCompletionRequestArgs, CreateEmbeddingRequestArgs, Embedding, CreateChatCompletionRequestArgs, CreateEmbeddingRequestArgs
}; };
use futures::future::try_join_all;
use futures::SinkExt;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use surrealdb::engine::remote::ws::Client; use surrealdb::engine::remote::ws::Client;
use surrealdb::Surreal; use surrealdb::Surreal;
use tokio::try_join; use tracing::debug;
use tracing::{debug, info};
use uuid::Uuid; use uuid::Uuid;
/// Represents a single knowledge entity from the LLM. /// Represents a single knowledge entity from the LLM.
@@ -135,13 +130,14 @@ impl LLMGraphAnalysisResult {
.relationships .relationships
.iter() .iter()
.filter_map(|llm_rel| { .filter_map(|llm_rel| {
let source_db_id = mapper.get_id(&llm_rel.source)?; let source_db_id = mapper.get_or_parse_id(&llm_rel.source);
let target_db_id = mapper.get_id(&llm_rel.target)?; let target_db_id = mapper.get_or_parse_id(&llm_rel.target);
debug!("IN: {}, OUT: {}", &source_db_id, &target_db_id);
Some(KnowledgeRelationship { Some(KnowledgeRelationship {
id: Uuid::new_v4().to_string(), id: Uuid::new_v4().to_string(),
out: source_db_id.to_string(), in_: source_db_id.to_string(),
in_: target_db_id.to_string(), out: target_db_id.to_string(),
relationship_type: llm_rel.type_.clone(), relationship_type: llm_rel.type_.clone(),
metadata: None, metadata: None,
}) })
@@ -173,6 +169,7 @@ pub async fn create_json_ld(
// Perform query and deserialize to struct // Perform query and deserialize to struct
let closest_entities: Vec<KnowledgeEntity> = db_client.query(closest_query).await?.take(0)?; let closest_entities: Vec<KnowledgeEntity> = db_client.query(closest_query).await?.take(0)?;
#[allow(dead_code)]
#[derive(Debug)] #[derive(Debug)]
struct KnowledgeEntityToLLM { struct KnowledgeEntityToLLM {
id: String, id: String,
@@ -187,24 +184,8 @@ pub async fn create_json_ld(
description: entity.description description: entity.description
}).collect(); }).collect();
info!("{:?}", closest_entities_to_llm); debug!("{:?}", closest_entities_to_llm);
for entity in closest_entities_to_llm {
info!("{:?}, {:?}", entity.name, entity.description);
}
// info!("Closest entities: {:?}", closest_entities);
panic!("Quitting");
let deleted: Vec<KnowledgeEntity> = db_client.delete("knowledge_entity").await?;
info! {"{:?} KnowledgeEntities deleted", deleted.len()};
// let relationships: Vec<KnowledgeRelationship> =
// db_client.select("knowledge_relationship").await?;
// info!("{:?} Relationships defined", relationships.len());
let relationships_deleted: Vec<KnowledgeRelationship> =
db_client.delete("knowledge_relationship").await?;
info!("{:?} Relationships deleted", relationships_deleted.len());
let schema = json!({ let schema = json!({
"type": "object", "type": "object",
@@ -258,7 +239,7 @@ pub async fn create_json_ld(
// Construct the system and user messages // Construct the system and user messages
let system_message = r#" let system_message = r#"
You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities, do not replicate these!
The JSON should have the following structure: The JSON should have the following structure:
@@ -275,8 +256,8 @@ pub async fn create_json_ld(
"relationships": [ "relationships": [
{ {
"type": "RelationshipType", "type": "RelationshipType",
"source": "unique-key-1", "source": "unique-key-1 or UUID from existing database",
"target": "unique-key-2" "target": "unique-key-1 or UUID from existing database"
}, },
// More relationships... // More relationships...
] ]
@@ -288,12 +269,13 @@ pub async fn create_json_ld(
3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet. 3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo. 4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity" 5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.
7. Only create relationships between existing KnowledgeEntities. 7. Only create relationships between existing KnowledgeEntities.
"#; "#;
let user_message = format!( let user_message = format!(
"Category: {}\nInstructions: {}\nContent:\n{}", "Category: {}\nInstructions: {}\nContent:\n{}\nExisting KnowledgeEntities:{:?}",
category, instructions, text category, instructions, text, closest_entities_to_llm
); );
// Build the chat completion request // Build the chat completion request