mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-30 03:10:45 +02:00
chore: harden common storage bootstrap and slim embedded db assets
Unify embedding config, build providers from system settings, and fail startup when index builds error or time out. Move Surreal assets under common/db so embeds exclude crate source, and read storage via streams.
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
# Defines the schema for the 'analytics' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS analytics SCHEMALESS;
|
||||
|
||||
# Custom fields from the Analytics struct
|
||||
DEFINE FIELD IF NOT EXISTS page_loads ON analytics TYPE number;
|
||||
DEFINE FIELD IF NOT EXISTS visitors ON analytics TYPE number;
|
||||
@@ -0,0 +1,6 @@
|
||||
# Defines authentication scope and access rules.
|
||||
# This mirrors the logic previously in SurrealDbClient::setup_auth
|
||||
|
||||
DEFINE ACCESS IF NOT EXISTS account ON DATABASE TYPE RECORD
|
||||
SIGNUP ( CREATE user SET email = $email, password = crypto::argon2::generate($password), anonymous = false, user_id = $user_id) # Ensure user_id is provided if needed
|
||||
SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(password, $password) );
|
||||
@@ -0,0 +1,16 @@
|
||||
# Defines the schema for the 'conversation' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS conversation SCHEMALESS;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE datetime;
|
||||
|
||||
# Custom fields from the Conversation struct
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON conversation TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS title ON conversation TYPE string;
|
||||
|
||||
# Add indexes based on query patterns (get_complete_conversation ownership check, get_user_conversations)
|
||||
DEFINE INDEX IF NOT EXISTS conversation_user_id_idx ON conversation FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS conversation_created_at_idx ON conversation FIELDS created_at; # For get_user_conversations ORDER BY
|
||||
DEFINE INDEX IF NOT EXISTS conversation_user_updated_at_idx ON conversation FIELDS user_id, updated_at; # For sidebar conversation projection ORDER BY
|
||||
@@ -0,0 +1,18 @@
|
||||
# Defines the schema for the 'file' table (used by FileInfo).
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS file SCHEMALESS;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON file TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON file TYPE datetime;
|
||||
|
||||
# Custom fields from the FileInfo struct
|
||||
DEFINE FIELD IF NOT EXISTS sha256 ON file TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS path ON file TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_name ON file TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS mime_type ON file TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON file TYPE string;
|
||||
|
||||
# Indexes based on usage (get_by_sha scoped by user_id, user lookups)
|
||||
DEFINE INDEX IF NOT EXISTS file_user_sha256_idx ON file FIELDS user_id, sha256 UNIQUE;
|
||||
DEFINE INDEX IF NOT EXISTS file_user_id_idx ON file FIELDS user_id;
|
||||
@@ -0,0 +1,16 @@
|
||||
# Defines the schema for the 'ingestion_task' table (used by IngestionTask).
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS ingestion_task SCHEMALESS;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime;
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;
|
||||
DEFINE FIELD IF NOT EXISTS status ON ingestion_task TYPE object;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;
|
||||
|
||||
# Indexes explicitly defined in build_indexes and useful for get_unfinished_tasks
|
||||
DEFINE INDEX IF NOT EXISTS idx_ingestion_task_status ON ingestion_task FIELDS status;
|
||||
DEFINE INDEX IF NOT EXISTS idx_ingestion_task_user ON ingestion_task FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS idx_ingestion_task_created ON ingestion_task FIELDS created_at;
|
||||
@@ -0,0 +1,27 @@
|
||||
# Defines the schema for the 'knowledge_entity' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMAFULL;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE datetime;
|
||||
|
||||
# Custom fields from the KnowledgeEntity struct
|
||||
DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS name ON knowledge_entity TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS description ON knowledge_entity TYPE string;
|
||||
# KnowledgeEntityType is an enum, store as string
|
||||
DEFINE FIELD IF NOT EXISTS entity_type ON knowledge_entity TYPE string;
|
||||
# metadata is Option<serde_json::Value>, store as object
|
||||
DEFINE FIELD IF NOT EXISTS metadata ON knowledge_entity TYPE option<object>;
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;
|
||||
|
||||
-- Indexes based on build_indexes and query patterns
|
||||
-- HNSW index now defined on knowledge_entity_embedding table for better memory usage
|
||||
-- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_user_source_idx ON knowledge_entity FIELDS user_id, source_id;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;
|
||||
@@ -0,0 +1,20 @@
|
||||
-- Defines the schema for the 'knowledge_entity_embedding' table.
|
||||
-- Separate table to optimize HNSW index creation memory usage
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;
|
||||
|
||||
-- Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity_embedding TYPE string;
|
||||
|
||||
-- Custom fields
|
||||
DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;
|
||||
DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array<float>;
|
||||
|
||||
-- Indexes
|
||||
-- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id UNIQUE;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_source_id_idx ON knowledge_entity_embedding FIELDS source_id;
|
||||
@@ -0,0 +1,19 @@
|
||||
# Defines the schema for the 'message' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS message SCHEMALESS;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON message TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON message TYPE datetime;
|
||||
|
||||
# Custom fields from the Message struct
|
||||
DEFINE FIELD IF NOT EXISTS conversation_id ON message TYPE string;
|
||||
# MessageRole is an enum, store as string
|
||||
DEFINE FIELD IF NOT EXISTS role ON message TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content ON message TYPE string;
|
||||
# references is Option<Vec<String>>, store as array<string>
|
||||
DEFINE FIELD IF NOT EXISTS references ON message TYPE option<array<string>>;
|
||||
|
||||
# Indexes based on query patterns (get_complete_conversation)
|
||||
DEFINE INDEX IF NOT EXISTS message_conversation_id_idx ON message FIELDS conversation_id;
|
||||
DEFINE INDEX IF NOT EXISTS message_updated_at_idx ON message FIELDS updated_at; # For ORDER BY
|
||||
@@ -0,0 +1,17 @@
|
||||
# Defines the 'relates_to' edge table for KnowledgeRelationships.
|
||||
# Edges connect nodes, in this case knowledge_entity records.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS relates_to SCHEMAFULL TYPE RELATION FROM knowledge_entity TO knowledge_entity;
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS in ON relates_to TYPE record<knowledge_entity>;
|
||||
DEFINE FIELD IF NOT EXISTS out ON relates_to TYPE record<knowledge_entity>;
|
||||
|
||||
# RelationshipMetadata is a struct, store as object
|
||||
DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;
|
||||
DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;
|
||||
|
||||
# Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)
|
||||
DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;
|
||||
DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;
|
||||
@@ -0,0 +1,23 @@
|
||||
# Defines the schema for the 'scratchpad' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS scratchpad SCHEMALESS;
|
||||
|
||||
# Standard fields from stored_object! macro
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON scratchpad TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON scratchpad TYPE datetime;
|
||||
|
||||
# Custom fields from the Scratchpad struct
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON scratchpad TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS title ON scratchpad TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content ON scratchpad TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS last_saved_at ON scratchpad TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS is_dirty ON scratchpad TYPE bool DEFAULT false;
|
||||
DEFINE FIELD IF NOT EXISTS is_archived ON scratchpad TYPE bool DEFAULT false;
|
||||
DEFINE FIELD IF NOT EXISTS archived_at ON scratchpad TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS ingested_at ON scratchpad TYPE option<datetime>;
|
||||
|
||||
# Indexes based on query patterns
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_user_idx ON scratchpad FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_user_archived_idx ON scratchpad FIELDS user_id, is_archived;
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_updated_idx ON scratchpad FIELDS updated_at;
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_archived_idx ON scratchpad FIELDS archived_at;
|
||||
@@ -0,0 +1,7 @@
|
||||
DEFINE TABLE OVERWRITE script_migration SCHEMAFULL
|
||||
PERMISSIONS
|
||||
FOR select FULL
|
||||
FOR create, update, delete NONE;
|
||||
|
||||
DEFINE FIELD OVERWRITE script_name ON script_migration TYPE string;
|
||||
DEFINE FIELD OVERWRITE executed_at ON script_migration TYPE datetime VALUE time::now() READONLY;
|
||||
@@ -0,0 +1,16 @@
|
||||
# Defines the schema for the 'system_settings' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS system_settings SCHEMALESS;
|
||||
|
||||
# Custom fields from the SystemSettings struct
|
||||
DEFINE FIELD IF NOT EXISTS registrations_enabled ON system_settings TYPE bool;
|
||||
DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;
|
||||
DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;
|
||||
DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;
|
||||
@@ -0,0 +1,17 @@
|
||||
# Defines the schema for the 'text_chunk' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS text_chunk SCHEMALESS;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE datetime;
|
||||
|
||||
# Custom fields from the TextChunk struct
|
||||
DEFINE FIELD IF NOT EXISTS source_id ON text_chunk TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS chunk ON text_chunk TYPE string;
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON text_chunk TYPE string;
|
||||
|
||||
# Indexes based on build_indexes and query patterns (delete_by_source_id)
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;
|
||||
@@ -0,0 +1,20 @@
|
||||
-- Defines the schema for the 'text_chunk_embedding' table.
|
||||
-- Separate table to optimize HNSW index creation memory usage
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON text_chunk_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON text_chunk_embedding TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS source_id ON text_chunk_embedding TYPE string;
|
||||
|
||||
# Custom fields
|
||||
DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record<text_chunk>;
|
||||
DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array<float>;
|
||||
|
||||
-- Indexes
|
||||
-- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;
|
||||
@@ -0,0 +1,36 @@
|
||||
# Defines the schema for the 'text_content' table.
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS text_content SCHEMAFULL;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE datetime;
|
||||
|
||||
# Custom fields from the TextContent struct
|
||||
DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
|
||||
# FileInfo is a struct, store as object
|
||||
DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
|
||||
# UrlInfo is a struct, store as object
|
||||
DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
|
||||
|
||||
# FileInfo fields
|
||||
DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;
|
||||
|
||||
# Indexes based on query patterns
|
||||
DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;
|
||||
DEFINE INDEX IF NOT EXISTS text_content_category_idx ON text_content FIELDS category;
|
||||
@@ -0,0 +1,20 @@
|
||||
# Defines the schema for the 'user' table.
|
||||
# NOTE: Authentication scope and access rules are defined in auth.surql
|
||||
|
||||
DEFINE TABLE IF NOT EXISTS user SCHEMALESS;
|
||||
|
||||
# Standard fields
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON user TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON user TYPE datetime;
|
||||
|
||||
# Custom fields from the User struct
|
||||
DEFINE FIELD IF NOT EXISTS email ON user TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS password ON user TYPE string; # Stores the hashed password
|
||||
DEFINE FIELD IF NOT EXISTS anonymous ON user TYPE bool;
|
||||
DEFINE FIELD IF NOT EXISTS api_key ON user TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS admin ON user TYPE bool;
|
||||
DEFINE FIELD IF NOT EXISTS timezone ON user TYPE string;
|
||||
|
||||
# Indexes based on query patterns (find_by_email, find_by_api_key, unique constraint from setup_auth)
|
||||
DEFINE INDEX IF NOT EXISTS user_email_idx ON user FIELDS email UNIQUE;
|
||||
DEFINE INDEX IF NOT EXISTS user_api_key_idx ON user FIELDS api_key;
|
||||
Reference in New Issue
Block a user