From 6af463ed44aea0b7a4bc8c86dfbdd638206041cb Mon Sep 17 00:00:00 2001 From: Per Stark Date: Wed, 16 Oct 2024 09:39:15 +0200 Subject: [PATCH] database plan --- database.md | 59 ++++++++++++++++++++++++++++++++++++++ src/models/text_content.rs | 2 ++ 2 files changed, 61 insertions(+) create mode 100644 database.md diff --git a/database.md b/database.md new file mode 100644 index 0000000..5990988 --- /dev/null +++ b/database.md @@ -0,0 +1,59 @@ +# SurrealDB only + +Right now we have the FileInfo stored in "files" + +- Change the uuid to Uuid type, and have the database layer still use String. Means parsing and unparsing but thats fine. + +``` +pub struct FileInfo { + pub uuid: String, + pub sha256: String, + pub path: String, + pub mime_type: String, +} +``` + +We create TextContent objects, which we should store? + +- We store the "snippets" along with the vectors, but it would make sense to store the whole textcontent, at least for not enormous files? + +``` +pub struct TextContent { + pub id: Uuid, + pub text: String, + pub file_info: Option, + pub instructions: String, + pub category: String, +} +``` + +We create KnowledgeSource, which we will store + +- Add a uuid to we can link the textcontent and files to the knowledge sources? + +``` +pub struct KnowledgeSource { + pub id: String, + pub title: String, + pub description: String, + pub relationships: Vec, +} +``` + +We will create embeddings and vector representations of TextContent, possibly split up and store in vector DB + +``` +pub struct VectorEmbeddingOfTextContent { + pub id: Uuid, + pub vectors: Vec(or something), + pub text_content: String, + pub category: String, +} +``` + +______________________________________________________________________ + +## Goals + +- Smooth operations when updating, removing and adding data +- Smooth queries where one can search, get a vector snippet, which links to a graph node and its edges, and also the fulltext document. diff --git a/src/models/text_content.rs b/src/models/text_content.rs index f145107..4deaee1 100644 --- a/src/models/text_content.rs +++ b/src/models/text_content.rs @@ -84,6 +84,7 @@ impl TextContent { } /// Stores analysis results in a graph database. + #[allow(dead_code)] async fn store_in_graph_db(&self, _analysis: &AnalysisResult) -> Result<(), ProcessingError> { // TODO: Implement storage logic for your specific graph database. // Example: @@ -95,6 +96,7 @@ impl TextContent { } /// Splits text and stores it in a vector database. + #[allow(dead_code)] async fn store_in_vector_db(&self) -> Result<(), ProcessingError> { // TODO: Implement text splitting and vector storage logic. // Example: