mirror of
https://github.com/perstarkse/minne.git
synced 2026-04-23 17:28:34 +02:00
feat: creation of nodes and edges complete
This commit is contained in:
37
Cargo.lock
generated
37
Cargo.lock
generated
@@ -1561,9 +1561,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "futures"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
|
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@@ -1576,9 +1576,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
|
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
@@ -1586,15 +1586,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-core"
|
name = "futures-core"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
|
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-executor"
|
name = "futures-executor"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
|
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
@@ -1603,9 +1603,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
|
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-lite"
|
name = "futures-lite"
|
||||||
@@ -1637,9 +1637,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-macro"
|
name = "futures-macro"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
|
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -1648,15 +1648,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
|
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-task"
|
name = "futures-task"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
|
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-timer"
|
name = "futures-timer"
|
||||||
@@ -1666,9 +1666,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-util"
|
name = "futures-util"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
|
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@@ -5340,6 +5340,7 @@ dependencies = [
|
|||||||
"async-openai",
|
"async-openai",
|
||||||
"axum",
|
"axum",
|
||||||
"axum_typed_multipart",
|
"axum_typed_multipart",
|
||||||
|
"futures",
|
||||||
"futures-lite 2.3.0",
|
"futures-lite 2.3.0",
|
||||||
"lapin",
|
"lapin",
|
||||||
"mime",
|
"mime",
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ edition = "2021"
|
|||||||
async-openai = "0.24.1"
|
async-openai = "0.24.1"
|
||||||
axum = { version = "0.7.5", features = ["multipart", "macros"] }
|
axum = { version = "0.7.5", features = ["multipart", "macros"] }
|
||||||
axum_typed_multipart = "0.12.1"
|
axum_typed_multipart = "0.12.1"
|
||||||
|
futures = "0.3.31"
|
||||||
futures-lite = "2.3.0"
|
futures-lite = "2.3.0"
|
||||||
lapin = { version = "2.5.0", features = ["serde_json"] }
|
lapin = { version = "2.5.0", features = ["serde_json"] }
|
||||||
mime = "0.3.17"
|
mime = "0.3.17"
|
||||||
|
|||||||
@@ -79,6 +79,14 @@ impl GraphMapper {
|
|||||||
key_to_id: HashMap::new(),
|
key_to_id: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/// Get ID, tries to parse UUID
|
||||||
|
pub fn get_or_parse_id(&mut self, key: &str) -> Uuid {
|
||||||
|
if let Ok(parsed_uuid) = Uuid::parse_str(key) {
|
||||||
|
parsed_uuid
|
||||||
|
} else {
|
||||||
|
self.key_to_id.get(key).unwrap().clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Assigns a new UUID for a given key.
|
/// Assigns a new UUID for a given key.
|
||||||
pub fn assign_id(&mut self, key: &str) -> Uuid {
|
pub fn assign_id(&mut self, key: &str) -> Uuid {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use async_openai::{error::OpenAIError, types::{CreateEmbeddingRequest, CreateEmbeddingRequestArgs}};
|
use async_openai::error::OpenAIError;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use surrealdb::{engine::remote::ws::Client, Surreal};
|
use surrealdb::{engine::remote::ws::Client, Surreal};
|
||||||
use tracing::{debug, info};
|
use tracing::{debug, info};
|
||||||
@@ -50,6 +50,15 @@ impl TextContent {
|
|||||||
// Store TextContent
|
// Store TextContent
|
||||||
let db_client = SurrealDbClient::new().await?;
|
let db_client = SurrealDbClient::new().await?;
|
||||||
|
|
||||||
|
// let deleted: Vec<KnowledgeEntity> = db_client.delete("knowledge_entity").await?;
|
||||||
|
// info! {"{:?} KnowledgeEntities deleted", deleted.len()};
|
||||||
|
|
||||||
|
// let relationships_deleted: Vec<KnowledgeRelationship> =
|
||||||
|
// db_client.delete("knowledge_relationship").await?;
|
||||||
|
// info!("{:?} Relationships deleted", relationships_deleted.len());
|
||||||
|
|
||||||
|
// panic!("STOP");
|
||||||
|
|
||||||
// db_client.query("REMOVE INDEX embeddings ON knowledge_entity").await?;
|
// db_client.query("REMOVE INDEX embeddings ON knowledge_entity").await?;
|
||||||
// db_client.query("DEFINE INDEX embeddings ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536").await?;
|
// db_client.query("DEFINE INDEX embeddings ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536").await?;
|
||||||
db_client.query("REBUILD INDEX IF EXISTS embeddings ON knowledge_entity").await?;
|
db_client.query("REBUILD INDEX IF EXISTS embeddings ON knowledge_entity").await?;
|
||||||
@@ -78,7 +87,7 @@ impl TextContent {
|
|||||||
db_client: &Surreal<Client>,
|
db_client: &Surreal<Client>,
|
||||||
) -> Result<(), ProcessingError> {
|
) -> Result<(), ProcessingError> {
|
||||||
for entity in &entities {
|
for entity in &entities {
|
||||||
// info!("{:?}", &entity);
|
info!("{:?}, {:?}, {:?}", &entity.id, &entity.name, &entity.description);
|
||||||
|
|
||||||
let _created: Option<KnowledgeEntity> = db_client
|
let _created: Option<KnowledgeEntity> = db_client
|
||||||
.create(("knowledge_entity", &entity.id.to_string()))
|
.create(("knowledge_entity", &entity.id.to_string()))
|
||||||
@@ -99,6 +108,18 @@ impl TextContent {
|
|||||||
debug!("{:?}",_created);
|
debug!("{:?}",_created);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for relationship in &relationships {
|
||||||
|
let in_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity",relationship.in_.to_string())).await?;
|
||||||
|
let out_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity", relationship.out.to_string())).await?;
|
||||||
|
|
||||||
|
if let (Some(in_), Some(out)) = (in_entity, out_entity) {
|
||||||
|
info!("{} - {} is {} to {} - {}", in_.id, in_.name, relationship.relationship_type, out.id, out.name);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
info!("No in or out entities found");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
info!("Inserted to database: {:?} entities, {:?} relationships", entities.len(), relationships.len());
|
info!("Inserted to database: {:?} entities, {:?} relationships", entities.len(), relationships.len());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -1,21 +1,16 @@
|
|||||||
use core::panic;
|
|
||||||
|
|
||||||
use crate::models::graph_entities::{
|
use crate::models::graph_entities::{
|
||||||
GraphMapper, KnowledgeEntity, KnowledgeEntityType, KnowledgeRelationship,
|
GraphMapper, KnowledgeEntity, KnowledgeEntityType, KnowledgeRelationship,
|
||||||
};
|
};
|
||||||
use crate::models::text_content::ProcessingError;
|
use crate::models::text_content::ProcessingError;
|
||||||
use async_openai::types::{
|
use async_openai::types::{
|
||||||
ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
|
ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
|
||||||
CreateChatCompletionRequestArgs, CreateEmbeddingRequestArgs, Embedding,
|
CreateChatCompletionRequestArgs, CreateEmbeddingRequestArgs
|
||||||
};
|
};
|
||||||
use futures::future::try_join_all;
|
|
||||||
use futures::SinkExt;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use surrealdb::engine::remote::ws::Client;
|
use surrealdb::engine::remote::ws::Client;
|
||||||
use surrealdb::Surreal;
|
use surrealdb::Surreal;
|
||||||
use tokio::try_join;
|
use tracing::debug;
|
||||||
use tracing::{debug, info};
|
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
/// Represents a single knowledge entity from the LLM.
|
/// Represents a single knowledge entity from the LLM.
|
||||||
@@ -135,13 +130,14 @@ impl LLMGraphAnalysisResult {
|
|||||||
.relationships
|
.relationships
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|llm_rel| {
|
.filter_map(|llm_rel| {
|
||||||
let source_db_id = mapper.get_id(&llm_rel.source)?;
|
let source_db_id = mapper.get_or_parse_id(&llm_rel.source);
|
||||||
let target_db_id = mapper.get_id(&llm_rel.target)?;
|
let target_db_id = mapper.get_or_parse_id(&llm_rel.target);
|
||||||
|
debug!("IN: {}, OUT: {}", &source_db_id, &target_db_id);
|
||||||
|
|
||||||
Some(KnowledgeRelationship {
|
Some(KnowledgeRelationship {
|
||||||
id: Uuid::new_v4().to_string(),
|
id: Uuid::new_v4().to_string(),
|
||||||
out: source_db_id.to_string(),
|
in_: source_db_id.to_string(),
|
||||||
in_: target_db_id.to_string(),
|
out: target_db_id.to_string(),
|
||||||
relationship_type: llm_rel.type_.clone(),
|
relationship_type: llm_rel.type_.clone(),
|
||||||
metadata: None,
|
metadata: None,
|
||||||
})
|
})
|
||||||
@@ -173,6 +169,7 @@ pub async fn create_json_ld(
|
|||||||
|
|
||||||
// Perform query and deserialize to struct
|
// Perform query and deserialize to struct
|
||||||
let closest_entities: Vec<KnowledgeEntity> = db_client.query(closest_query).await?.take(0)?;
|
let closest_entities: Vec<KnowledgeEntity> = db_client.query(closest_query).await?.take(0)?;
|
||||||
|
#[allow(dead_code)]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct KnowledgeEntityToLLM {
|
struct KnowledgeEntityToLLM {
|
||||||
id: String,
|
id: String,
|
||||||
@@ -187,24 +184,8 @@ pub async fn create_json_ld(
|
|||||||
description: entity.description
|
description: entity.description
|
||||||
}).collect();
|
}).collect();
|
||||||
|
|
||||||
info!("{:?}", closest_entities_to_llm);
|
debug!("{:?}", closest_entities_to_llm);
|
||||||
|
|
||||||
for entity in closest_entities_to_llm {
|
|
||||||
info!("{:?}, {:?}", entity.name, entity.description);
|
|
||||||
}
|
|
||||||
// info!("Closest entities: {:?}", closest_entities);
|
|
||||||
panic!("Quitting");
|
|
||||||
|
|
||||||
let deleted: Vec<KnowledgeEntity> = db_client.delete("knowledge_entity").await?;
|
|
||||||
info! {"{:?} KnowledgeEntities deleted", deleted.len()};
|
|
||||||
|
|
||||||
// let relationships: Vec<KnowledgeRelationship> =
|
|
||||||
// db_client.select("knowledge_relationship").await?;
|
|
||||||
// info!("{:?} Relationships defined", relationships.len());
|
|
||||||
|
|
||||||
let relationships_deleted: Vec<KnowledgeRelationship> =
|
|
||||||
db_client.delete("knowledge_relationship").await?;
|
|
||||||
info!("{:?} Relationships deleted", relationships_deleted.len());
|
|
||||||
|
|
||||||
let schema = json!({
|
let schema = json!({
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -258,7 +239,7 @@ pub async fn create_json_ld(
|
|||||||
|
|
||||||
// Construct the system and user messages
|
// Construct the system and user messages
|
||||||
let system_message = r#"
|
let system_message = r#"
|
||||||
You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database.
|
You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities, do not replicate these!
|
||||||
|
|
||||||
The JSON should have the following structure:
|
The JSON should have the following structure:
|
||||||
|
|
||||||
@@ -275,8 +256,8 @@ pub async fn create_json_ld(
|
|||||||
"relationships": [
|
"relationships": [
|
||||||
{
|
{
|
||||||
"type": "RelationshipType",
|
"type": "RelationshipType",
|
||||||
"source": "unique-key-1",
|
"source": "unique-key-1 or UUID from existing database",
|
||||||
"target": "unique-key-2"
|
"target": "unique-key-1 or UUID from existing database"
|
||||||
},
|
},
|
||||||
// More relationships...
|
// More relationships...
|
||||||
]
|
]
|
||||||
@@ -288,12 +269,13 @@ pub async fn create_json_ld(
|
|||||||
3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
|
3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
|
||||||
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
|
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
|
||||||
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
|
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
|
||||||
|
6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.
|
||||||
7. Only create relationships between existing KnowledgeEntities.
|
7. Only create relationships between existing KnowledgeEntities.
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
let user_message = format!(
|
let user_message = format!(
|
||||||
"Category: {}\nInstructions: {}\nContent:\n{}",
|
"Category: {}\nInstructions: {}\nContent:\n{}\nExisting KnowledgeEntities:{:?}",
|
||||||
category, instructions, text
|
category, instructions, text, closest_entities_to_llm
|
||||||
);
|
);
|
||||||
|
|
||||||
// Build the chat completion request
|
// Build the chat completion request
|
||||||
|
|||||||
Reference in New Issue
Block a user