mirror of
https://github.com/perstarkse/minne.git
synced 2026-04-25 10:18:38 +02:00
refactoring: completed storage, now using new fn to construct
This commit is contained in:
@@ -1,66 +1,6 @@
|
|||||||
use serde::Deserialize;
|
|
||||||
use serde::Deserializer;
|
|
||||||
use serde::Serialize;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use surrealdb::sql::Thing;
|
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
/// Represents a generic knowledge entity in the graph.
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
||||||
pub struct KnowledgeEntity {
|
|
||||||
#[serde(deserialize_with = "thing_to_string")]
|
|
||||||
pub id: String,
|
|
||||||
pub name: String,
|
|
||||||
pub description: String,
|
|
||||||
pub entity_type: KnowledgeEntityType,
|
|
||||||
pub source_id: String,
|
|
||||||
pub metadata: Option<serde_json::Value>,
|
|
||||||
pub embedding: Option<Vec<f32>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn thing_to_string<'de, D>(deserializer: D) -> Result<String, D::Error>
|
|
||||||
where
|
|
||||||
D: Deserializer<'de>,
|
|
||||||
{
|
|
||||||
let thing = Thing::deserialize(deserializer)?;
|
|
||||||
Ok(thing.id.to_raw())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
||||||
pub enum KnowledgeEntityType {
|
|
||||||
Idea,
|
|
||||||
Project,
|
|
||||||
Document,
|
|
||||||
Page,
|
|
||||||
TextSnippet,
|
|
||||||
// Add more types as needed
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<String> for KnowledgeEntityType {
|
|
||||||
fn from(s: String) -> Self {
|
|
||||||
match s.to_lowercase().as_str() {
|
|
||||||
"idea" => KnowledgeEntityType::Idea,
|
|
||||||
"project" => KnowledgeEntityType::Project,
|
|
||||||
"document" => KnowledgeEntityType::Document,
|
|
||||||
"page" => KnowledgeEntityType::Page,
|
|
||||||
"textsnippet" => KnowledgeEntityType::TextSnippet,
|
|
||||||
_ => KnowledgeEntityType::Document, // Default case
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents a relationship between two knowledge entities.
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
||||||
pub struct KnowledgeRelationship {
|
|
||||||
#[serde(deserialize_with = "thing_to_string")]
|
|
||||||
pub id: String,
|
|
||||||
#[serde(rename = "in")]
|
|
||||||
pub in_: String, // Target KnowledgeEntity ID
|
|
||||||
pub out: String, // Source KnowledgeEntity ID
|
|
||||||
pub relationship_type: String, // e.g., RelatedTo, RelevantTo
|
|
||||||
pub metadata: Option<serde_json::Value>, // Additional metadata
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Intermediate struct to hold mapping between LLM keys and generated IDs.
|
/// Intermediate struct to hold mapping between LLM keys and generated IDs.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct GraphMapper {
|
pub struct GraphMapper {
|
||||||
|
|||||||
@@ -40,43 +40,35 @@ impl IngressObject {
|
|||||||
category,
|
category,
|
||||||
} => {
|
} => {
|
||||||
let text = Self::fetch_text_from_url(url).await?;
|
let text = Self::fetch_text_from_url(url).await?;
|
||||||
let id = Uuid::new_v4();
|
Ok(TextContent::new(
|
||||||
Ok(TextContent {
|
|
||||||
id: id.to_string(),
|
|
||||||
text,
|
text,
|
||||||
instructions: instructions.clone(),
|
instructions.into(),
|
||||||
category: category.clone(),
|
category.into(),
|
||||||
file_info: None,
|
None,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
IngressObject::Text {
|
IngressObject::Text {
|
||||||
text,
|
text,
|
||||||
instructions,
|
instructions,
|
||||||
category,
|
category,
|
||||||
} => {
|
} => Ok(TextContent::new(
|
||||||
let id = Uuid::new_v4();
|
text.into(),
|
||||||
Ok(TextContent {
|
instructions.into(),
|
||||||
id: id.to_string(),
|
category.into(),
|
||||||
text: text.clone(),
|
None,
|
||||||
instructions: instructions.clone(),
|
)),
|
||||||
category: category.clone(),
|
|
||||||
file_info: None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
IngressObject::File {
|
IngressObject::File {
|
||||||
file_info,
|
file_info,
|
||||||
instructions,
|
instructions,
|
||||||
category,
|
category,
|
||||||
} => {
|
} => {
|
||||||
let id = Uuid::new_v4();
|
|
||||||
let text = Self::extract_text_from_file(file_info).await?;
|
let text = Self::extract_text_from_file(file_info).await?;
|
||||||
Ok(TextContent {
|
Ok(TextContent::new(
|
||||||
id: id.to_string(),
|
|
||||||
text,
|
text,
|
||||||
instructions: instructions.clone(),
|
instructions.into(),
|
||||||
category: category.clone(),
|
category.into(),
|
||||||
file_info: Some(file_info.clone()),
|
Some(file_info.to_owned()),
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use crate::storage;
|
|
||||||
use crate::storage::db::store_item;
|
use crate::storage::db::store_item;
|
||||||
|
use crate::storage::types::knowledge_entity::KnowledgeEntity;
|
||||||
|
use crate::storage::types::knowledge_relationship::KnowledgeRelationship;
|
||||||
use crate::storage::types::text_chunk::TextChunk;
|
use crate::storage::types::text_chunk::TextChunk;
|
||||||
use crate::storage::types::text_content::TextContent;
|
use crate::storage::types::text_content::TextContent;
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -10,29 +11,6 @@ use crate::{
|
|||||||
use surrealdb::{engine::remote::ws::Client, Surreal};
|
use surrealdb::{engine::remote::ws::Client, Surreal};
|
||||||
use text_splitter::TextSplitter;
|
use text_splitter::TextSplitter;
|
||||||
use tracing::{debug, info};
|
use tracing::{debug, info};
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use super::graph_entities::{KnowledgeEntity, KnowledgeRelationship};
|
|
||||||
|
|
||||||
// #[derive(Serialize, Deserialize, Debug)]
|
|
||||||
// struct TextChunk {
|
|
||||||
// #[serde(deserialize_with = "thing_to_string")]
|
|
||||||
// id: String,
|
|
||||||
// source_id: String,
|
|
||||||
// chunk: String,
|
|
||||||
// embedding: Vec<f32>,
|
|
||||||
// }
|
|
||||||
|
|
||||||
/// Represents a single piece of text content extracted from various sources.
|
|
||||||
// #[derive(Debug, Serialize, Deserialize, Clone)]
|
|
||||||
// pub struct TextContent {
|
|
||||||
// #[serde(deserialize_with = "thing_to_string")]
|
|
||||||
// pub id: String,
|
|
||||||
// pub text: String,
|
|
||||||
// pub file_info: Option<FileInfo>,
|
|
||||||
// pub instructions: String,
|
|
||||||
// pub category: String,
|
|
||||||
// }
|
|
||||||
|
|
||||||
async fn vector_comparison<T>(
|
async fn vector_comparison<T>(
|
||||||
take: u8,
|
take: u8,
|
||||||
@@ -70,14 +48,14 @@ async fn get_related_nodes(
|
|||||||
impl TextContent {
|
impl TextContent {
|
||||||
/// Processes the `TextContent` by sending it to an LLM, storing in a graph DB, and vector DB.
|
/// Processes the `TextContent` by sending it to an LLM, storing in a graph DB, and vector DB.
|
||||||
pub async fn process(&self) -> Result<(), ProcessingError> {
|
pub async fn process(&self) -> Result<(), ProcessingError> {
|
||||||
// Store TextContent
|
|
||||||
let db_client = SurrealDbClient::new().await?;
|
let db_client = SurrealDbClient::new().await?;
|
||||||
let openai_client = async_openai::Client::new();
|
let openai_client = async_openai::Client::new();
|
||||||
|
|
||||||
let create_operation = storage::db::store_item(&db_client, self.clone()).await?;
|
// Store TextContent
|
||||||
|
let create_operation = store_item(&db_client, self.clone()).await?;
|
||||||
info!("{:?}", create_operation);
|
info!("{:?}", create_operation);
|
||||||
// self.store_text_content(&db_client).await?;
|
|
||||||
|
|
||||||
|
// Get related nodes
|
||||||
let closest_text_content: Vec<TextChunk> = vector_comparison(
|
let closest_text_content: Vec<TextChunk> = vector_comparison(
|
||||||
3,
|
3,
|
||||||
self.text.clone(),
|
self.text.clone(),
|
||||||
@@ -148,42 +126,20 @@ impl TextContent {
|
|||||||
db_client: &Surreal<Client>,
|
db_client: &Surreal<Client>,
|
||||||
) -> Result<(), ProcessingError> {
|
) -> Result<(), ProcessingError> {
|
||||||
for entity in &entities {
|
for entity in &entities {
|
||||||
info!(
|
debug!(
|
||||||
"{:?}, {:?}, {:?}",
|
"{:?}, {:?}, {:?}",
|
||||||
&entity.id, &entity.name, &entity.description
|
&entity.id, &entity.name, &entity.description
|
||||||
);
|
);
|
||||||
|
|
||||||
let _created: Option<KnowledgeEntity> = db_client
|
store_item(db_client, entity.clone()).await?;
|
||||||
.create(("knowledge_entity", &entity.id.to_string()))
|
|
||||||
.content(entity.clone())
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
debug!("{:?}", _created);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for relationship in &relationships {
|
for relationship in &relationships {
|
||||||
// info!("{:?}", relationship);
|
debug!("{:?}", relationship);
|
||||||
|
|
||||||
let _created: Option<KnowledgeRelationship> = db_client
|
store_item(db_client, relationship.clone()).await?;
|
||||||
.insert(("knowledge_relationship", &relationship.id.to_string()))
|
|
||||||
.content(relationship.clone())
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
debug!("{:?}", _created);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// for relationship in &relationships {
|
|
||||||
// let in_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity",relationship.in_.to_string())).await?;
|
|
||||||
// let out_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity", relationship.out.to_string())).await?;
|
|
||||||
|
|
||||||
// if let (Some(in_), Some(out)) = (in_entity, out_entity) {
|
|
||||||
// info!("{} - {} is {} to {} - {}", in_.id, in_.name, relationship.relationship_type, out.id, out.name);
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
// info!("No in or out entities found");
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"Inserted to database: {:?} entities, {:?} relationships",
|
"Inserted to database: {:?} entities, {:?} relationships",
|
||||||
entities.len(),
|
entities.len(),
|
||||||
@@ -194,7 +150,6 @@ impl TextContent {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Splits text and stores it in a vector database.
|
/// Splits text and stores it in a vector database.
|
||||||
#[allow(dead_code)]
|
|
||||||
async fn store_in_vector_db(
|
async fn store_in_vector_db(
|
||||||
&self,
|
&self,
|
||||||
db_client: &Surreal<Client>,
|
db_client: &Surreal<Client>,
|
||||||
@@ -210,8 +165,6 @@ impl TextContent {
|
|||||||
let embedding = generate_embedding(&openai_client, chunk.to_string()).await?;
|
let embedding = generate_embedding(&openai_client, chunk.to_string()).await?;
|
||||||
let text_chunk = TextChunk::new(self.id.to_string(), chunk.to_string(), embedding);
|
let text_chunk = TextChunk::new(self.id.to_string(), chunk.to_string(), embedding);
|
||||||
|
|
||||||
info!("{:?}", text_chunk);
|
|
||||||
|
|
||||||
store_item(db_client, text_chunk).await?;
|
store_item(db_client, text_chunk).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
55
src/storage/types/knowledge_entity.rs
Normal file
55
src/storage/types/knowledge_entity.rs
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
use crate::stored_object;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
|
pub enum KnowledgeEntityType {
|
||||||
|
Idea,
|
||||||
|
Project,
|
||||||
|
Document,
|
||||||
|
Page,
|
||||||
|
TextSnippet,
|
||||||
|
// Add more types as needed
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for KnowledgeEntityType {
|
||||||
|
fn from(s: String) -> Self {
|
||||||
|
match s.to_lowercase().as_str() {
|
||||||
|
"idea" => KnowledgeEntityType::Idea,
|
||||||
|
"project" => KnowledgeEntityType::Project,
|
||||||
|
"document" => KnowledgeEntityType::Document,
|
||||||
|
"page" => KnowledgeEntityType::Page,
|
||||||
|
"textsnippet" => KnowledgeEntityType::TextSnippet,
|
||||||
|
_ => KnowledgeEntityType::Document, // Default case
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stored_object!(KnowledgeEntity, "knowledge_entity", {
|
||||||
|
source_id: String,
|
||||||
|
name: String,
|
||||||
|
description: String,
|
||||||
|
entity_type: KnowledgeEntityType,
|
||||||
|
metadata: Option<serde_json::Value>,
|
||||||
|
embedding: Vec<f32>
|
||||||
|
});
|
||||||
|
|
||||||
|
impl KnowledgeEntity {
|
||||||
|
pub fn new(
|
||||||
|
source_id: String,
|
||||||
|
name: String,
|
||||||
|
description: String,
|
||||||
|
entity_type: KnowledgeEntityType,
|
||||||
|
metadata: Option<serde_json::Value>,
|
||||||
|
embedding: Vec<f32>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
id: Uuid::new_v4().to_string(),
|
||||||
|
source_id,
|
||||||
|
name,
|
||||||
|
description,
|
||||||
|
entity_type,
|
||||||
|
metadata,
|
||||||
|
embedding,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
26
src/storage/types/knowledge_relationship.rs
Normal file
26
src/storage/types/knowledge_relationship.rs
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
use crate::stored_object;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
stored_object!(KnowledgeRelationship, "knowledge_relationship", {
|
||||||
|
in_: String,
|
||||||
|
out: String,
|
||||||
|
relationship_type: String,
|
||||||
|
metadata: Option<serde_json::Value>
|
||||||
|
});
|
||||||
|
|
||||||
|
impl KnowledgeRelationship {
|
||||||
|
pub fn new(
|
||||||
|
in_: String,
|
||||||
|
out: String,
|
||||||
|
relationship_type: String,
|
||||||
|
metadata: Option<serde_json::Value>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
id: Uuid::new_v4().to_string(),
|
||||||
|
in_,
|
||||||
|
out,
|
||||||
|
relationship_type,
|
||||||
|
metadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
use axum::async_trait;
|
use axum::async_trait;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
pub mod knowledge_entity;
|
||||||
|
pub mod knowledge_relationship;
|
||||||
pub mod text_chunk;
|
pub mod text_chunk;
|
||||||
pub mod text_content;
|
pub mod text_content;
|
||||||
|
|
||||||
@@ -11,7 +13,7 @@ pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
|
|||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! stored_object {
|
macro_rules! stored_object {
|
||||||
($name:ident, $table:expr, {$($field:ident: $ty:ty),*}) => {
|
($name:ident, $table:expr, {$($(#[$attr:meta])* $field:ident: $ty:ty),*}) => {
|
||||||
use axum::async_trait;
|
use axum::async_trait;
|
||||||
use serde::{Deserialize, Deserializer, Serialize};
|
use serde::{Deserialize, Deserializer, Serialize};
|
||||||
use surrealdb::sql::Thing;
|
use surrealdb::sql::Thing;
|
||||||
|
|||||||
@@ -11,15 +11,18 @@ stored_object!(TextContent, "text_content", {
|
|||||||
});
|
});
|
||||||
|
|
||||||
impl TextContent {
|
impl TextContent {
|
||||||
pub fn new(text: String, instructions: String, category: String) -> Self {
|
pub fn new(
|
||||||
|
text: String,
|
||||||
|
instructions: String,
|
||||||
|
category: String,
|
||||||
|
file_info: Option<FileInfo>,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
id: Uuid::new_v4().to_string(),
|
id: Uuid::new_v4().to_string(),
|
||||||
text,
|
text,
|
||||||
file_info: None,
|
file_info,
|
||||||
instructions,
|
instructions,
|
||||||
category,
|
category,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Other methods...
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
error::ProcessingError,
|
error::ProcessingError,
|
||||||
models::graph_entities::{
|
models::graph_entities::GraphMapper,
|
||||||
GraphMapper, KnowledgeEntity, KnowledgeEntityType, KnowledgeRelationship,
|
storage::types::{
|
||||||
|
knowledge_entity::{KnowledgeEntity, KnowledgeEntityType},
|
||||||
|
knowledge_relationship::KnowledgeRelationship,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use async_openai::types::{
|
use async_openai::types::{
|
||||||
@@ -119,7 +121,7 @@ impl LLMGraphAnalysisResult {
|
|||||||
entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()),
|
entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()),
|
||||||
source_id: source_id.to_string(),
|
source_id: source_id.to_string(),
|
||||||
metadata: None,
|
metadata: None,
|
||||||
embedding: Some(embedding),
|
embedding,
|
||||||
};
|
};
|
||||||
|
|
||||||
entities.push(knowledge_entity);
|
entities.push(knowledge_entity);
|
||||||
|
|||||||
Reference in New Issue
Block a user