mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-19 07:54:29 +01:00
refactoring: completed storage, now using new fn to construct
This commit is contained in:
@@ -1,66 +1,6 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Deserializer;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use surrealdb::sql::Thing;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Represents a generic knowledge entity in the graph.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct KnowledgeEntity {
|
||||
#[serde(deserialize_with = "thing_to_string")]
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub entity_type: KnowledgeEntityType,
|
||||
pub source_id: String,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
pub embedding: Option<Vec<f32>>,
|
||||
}
|
||||
|
||||
pub fn thing_to_string<'de, D>(deserializer: D) -> Result<String, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let thing = Thing::deserialize(deserializer)?;
|
||||
Ok(thing.id.to_raw())
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum KnowledgeEntityType {
|
||||
Idea,
|
||||
Project,
|
||||
Document,
|
||||
Page,
|
||||
TextSnippet,
|
||||
// Add more types as needed
|
||||
}
|
||||
|
||||
impl From<String> for KnowledgeEntityType {
|
||||
fn from(s: String) -> Self {
|
||||
match s.to_lowercase().as_str() {
|
||||
"idea" => KnowledgeEntityType::Idea,
|
||||
"project" => KnowledgeEntityType::Project,
|
||||
"document" => KnowledgeEntityType::Document,
|
||||
"page" => KnowledgeEntityType::Page,
|
||||
"textsnippet" => KnowledgeEntityType::TextSnippet,
|
||||
_ => KnowledgeEntityType::Document, // Default case
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a relationship between two knowledge entities.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct KnowledgeRelationship {
|
||||
#[serde(deserialize_with = "thing_to_string")]
|
||||
pub id: String,
|
||||
#[serde(rename = "in")]
|
||||
pub in_: String, // Target KnowledgeEntity ID
|
||||
pub out: String, // Source KnowledgeEntity ID
|
||||
pub relationship_type: String, // e.g., RelatedTo, RelevantTo
|
||||
pub metadata: Option<serde_json::Value>, // Additional metadata
|
||||
}
|
||||
|
||||
/// Intermediate struct to hold mapping between LLM keys and generated IDs.
|
||||
#[derive(Clone)]
|
||||
pub struct GraphMapper {
|
||||
|
||||
@@ -40,43 +40,35 @@ impl IngressObject {
|
||||
category,
|
||||
} => {
|
||||
let text = Self::fetch_text_from_url(url).await?;
|
||||
let id = Uuid::new_v4();
|
||||
Ok(TextContent {
|
||||
id: id.to_string(),
|
||||
Ok(TextContent::new(
|
||||
text,
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
file_info: None,
|
||||
})
|
||||
instructions.into(),
|
||||
category.into(),
|
||||
None,
|
||||
))
|
||||
}
|
||||
IngressObject::Text {
|
||||
text,
|
||||
instructions,
|
||||
category,
|
||||
} => {
|
||||
let id = Uuid::new_v4();
|
||||
Ok(TextContent {
|
||||
id: id.to_string(),
|
||||
text: text.clone(),
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
file_info: None,
|
||||
})
|
||||
}
|
||||
} => Ok(TextContent::new(
|
||||
text.into(),
|
||||
instructions.into(),
|
||||
category.into(),
|
||||
None,
|
||||
)),
|
||||
IngressObject::File {
|
||||
file_info,
|
||||
instructions,
|
||||
category,
|
||||
} => {
|
||||
let id = Uuid::new_v4();
|
||||
let text = Self::extract_text_from_file(file_info).await?;
|
||||
Ok(TextContent {
|
||||
id: id.to_string(),
|
||||
Ok(TextContent::new(
|
||||
text,
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
file_info: Some(file_info.clone()),
|
||||
})
|
||||
instructions.into(),
|
||||
category.into(),
|
||||
Some(file_info.to_owned()),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::storage;
|
||||
use crate::storage::db::store_item;
|
||||
use crate::storage::types::knowledge_entity::KnowledgeEntity;
|
||||
use crate::storage::types::knowledge_relationship::KnowledgeRelationship;
|
||||
use crate::storage::types::text_chunk::TextChunk;
|
||||
use crate::storage::types::text_content::TextContent;
|
||||
use crate::{
|
||||
@@ -10,29 +11,6 @@ use crate::{
|
||||
use surrealdb::{engine::remote::ws::Client, Surreal};
|
||||
use text_splitter::TextSplitter;
|
||||
use tracing::{debug, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::graph_entities::{KnowledgeEntity, KnowledgeRelationship};
|
||||
|
||||
// #[derive(Serialize, Deserialize, Debug)]
|
||||
// struct TextChunk {
|
||||
// #[serde(deserialize_with = "thing_to_string")]
|
||||
// id: String,
|
||||
// source_id: String,
|
||||
// chunk: String,
|
||||
// embedding: Vec<f32>,
|
||||
// }
|
||||
|
||||
/// Represents a single piece of text content extracted from various sources.
|
||||
// #[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
// pub struct TextContent {
|
||||
// #[serde(deserialize_with = "thing_to_string")]
|
||||
// pub id: String,
|
||||
// pub text: String,
|
||||
// pub file_info: Option<FileInfo>,
|
||||
// pub instructions: String,
|
||||
// pub category: String,
|
||||
// }
|
||||
|
||||
async fn vector_comparison<T>(
|
||||
take: u8,
|
||||
@@ -70,14 +48,14 @@ async fn get_related_nodes(
|
||||
impl TextContent {
|
||||
/// Processes the `TextContent` by sending it to an LLM, storing in a graph DB, and vector DB.
|
||||
pub async fn process(&self) -> Result<(), ProcessingError> {
|
||||
// Store TextContent
|
||||
let db_client = SurrealDbClient::new().await?;
|
||||
let openai_client = async_openai::Client::new();
|
||||
|
||||
let create_operation = storage::db::store_item(&db_client, self.clone()).await?;
|
||||
// Store TextContent
|
||||
let create_operation = store_item(&db_client, self.clone()).await?;
|
||||
info!("{:?}", create_operation);
|
||||
// self.store_text_content(&db_client).await?;
|
||||
|
||||
// Get related nodes
|
||||
let closest_text_content: Vec<TextChunk> = vector_comparison(
|
||||
3,
|
||||
self.text.clone(),
|
||||
@@ -148,42 +126,20 @@ impl TextContent {
|
||||
db_client: &Surreal<Client>,
|
||||
) -> Result<(), ProcessingError> {
|
||||
for entity in &entities {
|
||||
info!(
|
||||
debug!(
|
||||
"{:?}, {:?}, {:?}",
|
||||
&entity.id, &entity.name, &entity.description
|
||||
);
|
||||
|
||||
let _created: Option<KnowledgeEntity> = db_client
|
||||
.create(("knowledge_entity", &entity.id.to_string()))
|
||||
.content(entity.clone())
|
||||
.await?;
|
||||
|
||||
debug!("{:?}", _created);
|
||||
store_item(db_client, entity.clone()).await?;
|
||||
}
|
||||
|
||||
for relationship in &relationships {
|
||||
// info!("{:?}", relationship);
|
||||
debug!("{:?}", relationship);
|
||||
|
||||
let _created: Option<KnowledgeRelationship> = db_client
|
||||
.insert(("knowledge_relationship", &relationship.id.to_string()))
|
||||
.content(relationship.clone())
|
||||
.await?;
|
||||
|
||||
debug!("{:?}", _created);
|
||||
store_item(db_client, relationship.clone()).await?;
|
||||
}
|
||||
|
||||
// for relationship in &relationships {
|
||||
// let in_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity",relationship.in_.to_string())).await?;
|
||||
// let out_entity: Option<KnowledgeEntity> = db_client.select(("knowledge_entity", relationship.out.to_string())).await?;
|
||||
|
||||
// if let (Some(in_), Some(out)) = (in_entity, out_entity) {
|
||||
// info!("{} - {} is {} to {} - {}", in_.id, in_.name, relationship.relationship_type, out.id, out.name);
|
||||
// }
|
||||
// else {
|
||||
// info!("No in or out entities found");
|
||||
// }
|
||||
// }
|
||||
|
||||
info!(
|
||||
"Inserted to database: {:?} entities, {:?} relationships",
|
||||
entities.len(),
|
||||
@@ -194,7 +150,6 @@ impl TextContent {
|
||||
}
|
||||
|
||||
/// Splits text and stores it in a vector database.
|
||||
#[allow(dead_code)]
|
||||
async fn store_in_vector_db(
|
||||
&self,
|
||||
db_client: &Surreal<Client>,
|
||||
@@ -210,8 +165,6 @@ impl TextContent {
|
||||
let embedding = generate_embedding(&openai_client, chunk.to_string()).await?;
|
||||
let text_chunk = TextChunk::new(self.id.to_string(), chunk.to_string(), embedding);
|
||||
|
||||
info!("{:?}", text_chunk);
|
||||
|
||||
store_item(db_client, text_chunk).await?;
|
||||
}
|
||||
|
||||
|
||||
55
src/storage/types/knowledge_entity.rs
Normal file
55
src/storage/types/knowledge_entity.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
use crate::stored_object;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum KnowledgeEntityType {
|
||||
Idea,
|
||||
Project,
|
||||
Document,
|
||||
Page,
|
||||
TextSnippet,
|
||||
// Add more types as needed
|
||||
}
|
||||
|
||||
impl From<String> for KnowledgeEntityType {
|
||||
fn from(s: String) -> Self {
|
||||
match s.to_lowercase().as_str() {
|
||||
"idea" => KnowledgeEntityType::Idea,
|
||||
"project" => KnowledgeEntityType::Project,
|
||||
"document" => KnowledgeEntityType::Document,
|
||||
"page" => KnowledgeEntityType::Page,
|
||||
"textsnippet" => KnowledgeEntityType::TextSnippet,
|
||||
_ => KnowledgeEntityType::Document, // Default case
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stored_object!(KnowledgeEntity, "knowledge_entity", {
|
||||
source_id: String,
|
||||
name: String,
|
||||
description: String,
|
||||
entity_type: KnowledgeEntityType,
|
||||
metadata: Option<serde_json::Value>,
|
||||
embedding: Vec<f32>
|
||||
});
|
||||
|
||||
impl KnowledgeEntity {
|
||||
pub fn new(
|
||||
source_id: String,
|
||||
name: String,
|
||||
description: String,
|
||||
entity_type: KnowledgeEntityType,
|
||||
metadata: Option<serde_json::Value>,
|
||||
embedding: Vec<f32>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
source_id,
|
||||
name,
|
||||
description,
|
||||
entity_type,
|
||||
metadata,
|
||||
embedding,
|
||||
}
|
||||
}
|
||||
}
|
||||
26
src/storage/types/knowledge_relationship.rs
Normal file
26
src/storage/types/knowledge_relationship.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
use crate::stored_object;
|
||||
use uuid::Uuid;
|
||||
|
||||
stored_object!(KnowledgeRelationship, "knowledge_relationship", {
|
||||
in_: String,
|
||||
out: String,
|
||||
relationship_type: String,
|
||||
metadata: Option<serde_json::Value>
|
||||
});
|
||||
|
||||
impl KnowledgeRelationship {
|
||||
pub fn new(
|
||||
in_: String,
|
||||
out: String,
|
||||
relationship_type: String,
|
||||
metadata: Option<serde_json::Value>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
in_,
|
||||
out,
|
||||
relationship_type,
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
use axum::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
pub mod knowledge_entity;
|
||||
pub mod knowledge_relationship;
|
||||
pub mod text_chunk;
|
||||
pub mod text_content;
|
||||
|
||||
@@ -11,7 +13,7 @@ pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! stored_object {
|
||||
($name:ident, $table:expr, {$($field:ident: $ty:ty),*}) => {
|
||||
($name:ident, $table:expr, {$($(#[$attr:meta])* $field:ident: $ty:ty),*}) => {
|
||||
use axum::async_trait;
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use surrealdb::sql::Thing;
|
||||
|
||||
@@ -11,15 +11,18 @@ stored_object!(TextContent, "text_content", {
|
||||
});
|
||||
|
||||
impl TextContent {
|
||||
pub fn new(text: String, instructions: String, category: String) -> Self {
|
||||
pub fn new(
|
||||
text: String,
|
||||
instructions: String,
|
||||
category: String,
|
||||
file_info: Option<FileInfo>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
text,
|
||||
file_info: None,
|
||||
file_info,
|
||||
instructions,
|
||||
category,
|
||||
}
|
||||
}
|
||||
|
||||
// Other methods...
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
use crate::{
|
||||
error::ProcessingError,
|
||||
models::graph_entities::{
|
||||
GraphMapper, KnowledgeEntity, KnowledgeEntityType, KnowledgeRelationship,
|
||||
models::graph_entities::GraphMapper,
|
||||
storage::types::{
|
||||
knowledge_entity::{KnowledgeEntity, KnowledgeEntityType},
|
||||
knowledge_relationship::KnowledgeRelationship,
|
||||
},
|
||||
};
|
||||
use async_openai::types::{
|
||||
@@ -119,7 +121,7 @@ impl LLMGraphAnalysisResult {
|
||||
entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()),
|
||||
source_id: source_id.to_string(),
|
||||
metadata: None,
|
||||
embedding: Some(embedding),
|
||||
embedding,
|
||||
};
|
||||
|
||||
entities.push(knowledge_entity);
|
||||
|
||||
Reference in New Issue
Block a user