mirror of
https://github.com/perstarkse/minne.git
synced 2026-04-23 17:28:34 +02:00
database plan
This commit is contained in:
59
database.md
Normal file
59
database.md
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# SurrealDB only
|
||||||
|
|
||||||
|
Right now we have the FileInfo stored in "files"
|
||||||
|
|
||||||
|
- Change the uuid to Uuid type, and have the database layer still use String. Means parsing and unparsing but thats fine.
|
||||||
|
|
||||||
|
```
|
||||||
|
pub struct FileInfo {
|
||||||
|
pub uuid: String,
|
||||||
|
pub sha256: String,
|
||||||
|
pub path: String,
|
||||||
|
pub mime_type: String,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
We create TextContent objects, which we should store?
|
||||||
|
|
||||||
|
- We store the "snippets" along with the vectors, but it would make sense to store the whole textcontent, at least for not enormous files?
|
||||||
|
|
||||||
|
```
|
||||||
|
pub struct TextContent {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub text: String,
|
||||||
|
pub file_info: Option<FileInfo>,
|
||||||
|
pub instructions: String,
|
||||||
|
pub category: String,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
We create KnowledgeSource, which we will store
|
||||||
|
|
||||||
|
- Add a uuid to we can link the textcontent and files to the knowledge sources?
|
||||||
|
|
||||||
|
```
|
||||||
|
pub struct KnowledgeSource {
|
||||||
|
pub id: String,
|
||||||
|
pub title: String,
|
||||||
|
pub description: String,
|
||||||
|
pub relationships: Vec<Relationship>,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
We will create embeddings and vector representations of TextContent, possibly split up and store in vector DB
|
||||||
|
|
||||||
|
```
|
||||||
|
pub struct VectorEmbeddingOfTextContent {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub vectors: Vec<u8>(or something),
|
||||||
|
pub text_content: String,
|
||||||
|
pub category: String,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
______________________________________________________________________
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
|
||||||
|
- Smooth operations when updating, removing and adding data
|
||||||
|
- Smooth queries where one can search, get a vector snippet, which links to a graph node and its edges, and also the fulltext document.
|
||||||
@@ -84,6 +84,7 @@ impl TextContent {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Stores analysis results in a graph database.
|
/// Stores analysis results in a graph database.
|
||||||
|
#[allow(dead_code)]
|
||||||
async fn store_in_graph_db(&self, _analysis: &AnalysisResult) -> Result<(), ProcessingError> {
|
async fn store_in_graph_db(&self, _analysis: &AnalysisResult) -> Result<(), ProcessingError> {
|
||||||
// TODO: Implement storage logic for your specific graph database.
|
// TODO: Implement storage logic for your specific graph database.
|
||||||
// Example:
|
// Example:
|
||||||
@@ -95,6 +96,7 @@ impl TextContent {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Splits text and stores it in a vector database.
|
/// Splits text and stores it in a vector database.
|
||||||
|
#[allow(dead_code)]
|
||||||
async fn store_in_vector_db(&self) -> Result<(), ProcessingError> {
|
async fn store_in_vector_db(&self) -> Result<(), ProcessingError> {
|
||||||
// TODO: Implement text splitting and vector storage logic.
|
// TODO: Implement text splitting and vector storage logic.
|
||||||
// Example:
|
// Example:
|
||||||
|
|||||||
Reference in New Issue
Block a user