From 30b8a65377d66d1fcc188ecfb33a7df94d2c6eb3 Mon Sep 17 00:00:00 2001 From: Per Stark Date: Mon, 22 Dec 2025 08:04:50 +0100 Subject: [PATCH] fix: migrations schemafull --- ...20_000001_remove_legacy_hnsw_indexes.surql | 18 +++++ ...251121_113122_migrate_embedding_data.surql | 23 ++++++ .../20251231_enforce_schemafull.surql | 70 +++++++++++++++++++ ...151002_remove_legacy_embedding_fields.json | 1 - ..._embedding_backend_to_system_settings.json | 1 + common/migrations/definitions/_initial.json | 2 +- common/src/storage/types/text_content.rs | 3 + 7 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 common/migrations/20251120_000001_remove_legacy_hnsw_indexes.surql create mode 100644 common/migrations/20251121_113122_migrate_embedding_data.surql create mode 100644 common/migrations/20251231_enforce_schemafull.surql delete mode 100644 common/migrations/definitions/20251122_151002_remove_legacy_embedding_fields.json create mode 100644 common/migrations/definitions/20251210_add_embedding_backend_to_system_settings.json diff --git a/common/migrations/20251120_000001_remove_legacy_hnsw_indexes.surql b/common/migrations/20251120_000001_remove_legacy_hnsw_indexes.surql new file mode 100644 index 0000000..e61c559 --- /dev/null +++ b/common/migrations/20251120_000001_remove_legacy_hnsw_indexes.surql @@ -0,0 +1,18 @@ +-- Remove HNSW indexes from base tables (now created at runtime on *_embedding tables) +REMOVE INDEX IF EXISTS idx_embedding_entities ON knowledge_entity; +REMOVE INDEX IF EXISTS idx_embedding_chunks ON text_chunk; + +-- Remove FTS indexes (now created at runtime via indexes.rs) +REMOVE INDEX IF EXISTS text_content_fts_text_idx ON text_content; +REMOVE INDEX IF EXISTS text_content_fts_category_idx ON text_content; +REMOVE INDEX IF EXISTS text_content_fts_context_idx ON text_content; +REMOVE INDEX IF EXISTS text_content_fts_file_name_idx ON text_content; +REMOVE INDEX IF EXISTS text_content_fts_url_idx ON text_content; +REMOVE INDEX IF EXISTS text_content_fts_url_title_idx ON text_content; +REMOVE INDEX IF EXISTS knowledge_entity_fts_name_idx ON knowledge_entity; +REMOVE INDEX IF EXISTS knowledge_entity_fts_description_idx ON knowledge_entity; +REMOVE INDEX IF EXISTS text_chunk_fts_chunk_idx ON text_chunk; + +-- Remove legacy analyzers (recreated at runtime with updated configuration) +REMOVE ANALYZER IF EXISTS app_default_fts_analyzer; +REMOVE ANALYZER IF EXISTS app_en_fts_analyzer; diff --git a/common/migrations/20251121_113122_migrate_embedding_data.surql b/common/migrations/20251121_113122_migrate_embedding_data.surql new file mode 100644 index 0000000..9b92430 --- /dev/null +++ b/common/migrations/20251121_113122_migrate_embedding_data.surql @@ -0,0 +1,23 @@ +-- Copy embeddings from base tables to dedicated tables +-- This runs BEFORE the field removal migration + +FOR $chunk IN (SELECT * FROM text_chunk WHERE embedding != NONE AND array::len(embedding) > 0) { + CREATE text_chunk_embedding CONTENT { + chunk_id: $chunk.id, + embedding: $chunk.embedding, + user_id: $chunk.user_id, + source_id: $chunk.source_id, + created_at: $chunk.created_at, + updated_at: $chunk.updated_at + }; +}; + +FOR $entity IN (SELECT * FROM knowledge_entity WHERE embedding != NONE AND array::len(embedding) > 0) { + CREATE knowledge_entity_embedding CONTENT { + entity_id: $entity.id, + embedding: $entity.embedding, + user_id: $entity.user_id, + created_at: $entity.created_at, + updated_at: $entity.updated_at + }; +}; diff --git a/common/migrations/20251231_enforce_schemafull.surql b/common/migrations/20251231_enforce_schemafull.surql new file mode 100644 index 0000000..f957f38 --- /dev/null +++ b/common/migrations/20251231_enforce_schemafull.surql @@ -0,0 +1,70 @@ +-- Enforce SCHEMAFULL on all tables and define missing fields + +-- 1. Define missing fields for ingestion_task (formerly job, but now ingestion_task) +DEFINE TABLE OVERWRITE ingestion_task SCHEMAFULL; + +-- Core Fields +DEFINE FIELD IF NOT EXISTS id ON ingestion_task TYPE record; +DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime DEFAULT time::now(); +DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime DEFAULT time::now(); +DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string; + +-- State Machine Fields +DEFINE FIELD IF NOT EXISTS state ON ingestion_task TYPE string ASSERT $value IN ['Pending', 'Reserved', 'Processing', 'Succeeded', 'Failed', 'Cancelled', 'DeadLetter']; +DEFINE FIELD IF NOT EXISTS attempts ON ingestion_task TYPE int DEFAULT 0; +DEFINE FIELD IF NOT EXISTS max_attempts ON ingestion_task TYPE int DEFAULT 3; +DEFINE FIELD IF NOT EXISTS scheduled_at ON ingestion_task TYPE datetime DEFAULT time::now(); +DEFINE FIELD IF NOT EXISTS locked_at ON ingestion_task TYPE option; +DEFINE FIELD IF NOT EXISTS lease_duration_secs ON ingestion_task TYPE int DEFAULT 300; +DEFINE FIELD IF NOT EXISTS worker_id ON ingestion_task TYPE option; +DEFINE FIELD IF NOT EXISTS error_code ON ingestion_task TYPE option; +DEFINE FIELD IF NOT EXISTS error_message ON ingestion_task TYPE option; +DEFINE FIELD IF NOT EXISTS last_error_at ON ingestion_task TYPE option; +DEFINE FIELD IF NOT EXISTS priority ON ingestion_task TYPE int DEFAULT 0; + +-- Content Payload (IngestionPayload Enum) +DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object; +DEFINE FIELD IF NOT EXISTS content.Url ON ingestion_task TYPE option; +DEFINE FIELD IF NOT EXISTS content.Text ON ingestion_task TYPE option; +DEFINE FIELD IF NOT EXISTS content.File ON ingestion_task TYPE option; + +-- Content: Url Variant +DEFINE FIELD IF NOT EXISTS content.Url.url ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.Url.context ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.Url.category ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.Url.user_id ON ingestion_task TYPE string; + +-- Content: Text Variant +DEFINE FIELD IF NOT EXISTS content.Text.text ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.Text.context ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.Text.category ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.Text.user_id ON ingestion_task TYPE string; + +-- Content: File Variant +DEFINE FIELD IF NOT EXISTS content.File.context ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.category ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.user_id ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.file_info ON ingestion_task TYPE object; + +-- Content: File.file_info (FileInfo Struct) +DEFINE FIELD IF NOT EXISTS content.File.file_info.id ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.file_info.created_at ON ingestion_task TYPE datetime; +DEFINE FIELD IF NOT EXISTS content.File.file_info.updated_at ON ingestion_task TYPE datetime; +DEFINE FIELD IF NOT EXISTS content.File.file_info.sha256 ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.file_info.path ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.file_info.file_name ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.file_info.mime_type ON ingestion_task TYPE string; +DEFINE FIELD IF NOT EXISTS content.File.file_info.user_id ON ingestion_task TYPE string; + +-- 2. Enforce SCHEMAFULL on all other tables +DEFINE TABLE OVERWRITE analytics SCHEMAFULL; +DEFINE TABLE OVERWRITE conversation SCHEMAFULL; +DEFINE TABLE OVERWRITE file SCHEMAFULL; +DEFINE TABLE OVERWRITE knowledge_entity SCHEMAFULL; +DEFINE TABLE OVERWRITE message SCHEMAFULL; +DEFINE TABLE OVERWRITE relates_to SCHEMAFULL; +DEFINE TABLE OVERWRITE scratchpad SCHEMAFULL; +DEFINE TABLE OVERWRITE system_settings SCHEMAFULL; +DEFINE TABLE OVERWRITE text_chunk SCHEMAFULL; +DEFINE TABLE OVERWRITE text_content SCHEMAFULL; +DEFINE TABLE OVERWRITE user SCHEMAFULL; diff --git a/common/migrations/definitions/20251122_151002_remove_legacy_embedding_fields.json b/common/migrations/definitions/20251122_151002_remove_legacy_embedding_fields.json deleted file mode 100644 index 07655a8..0000000 --- a/common/migrations/definitions/20251122_151002_remove_legacy_embedding_fields.json +++ /dev/null @@ -1 +0,0 @@ -{"schemas":"--- original\n+++ modified\n@@ -85,31 +85,30 @@\n\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;\n\n-# Indexes based on build_indexes and query patterns\n-# The INDEX definition correctly specifies the vector properties\n-# HNSW index now defined on knowledge_entity_embedding table for better memory usage \n-# DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes based on build_indexes and query patterns\n+-- HNSW index now defined on knowledge_entity_embedding table for better memory usage\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n-# Defines the schema for the 'knowledge_entity_embedding' table.\n-# Separate table to optimize HNSW index creation memory usage\n+-- Defines the schema for the 'knowledge_entity_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n\n DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;\n\n-# Standard fields\n+-- Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;\n\n-# Custom fields\n+-- Custom fields\n DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record;\n DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array;\n\n-# Indexes\n-# DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;\n\n@@ -220,8 +219,8 @@\n DEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;\n\n-# Defines the schema for the 'text_chunk_embedding' table.\n-# Separate table to optimize HNSW index creation memory usage\n+-- Defines the schema for the 'text_chunk_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n\n DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;\n\n@@ -235,8 +234,8 @@\n DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record;\n DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array;\n\n-# Indexes\n-# DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n","events":null} \ No newline at end of file diff --git a/common/migrations/definitions/20251210_add_embedding_backend_to_system_settings.json b/common/migrations/definitions/20251210_add_embedding_backend_to_system_settings.json new file mode 100644 index 0000000..f0f1430 --- /dev/null +++ b/common/migrations/definitions/20251210_add_embedding_backend_to_system_settings.json @@ -0,0 +1 @@ +{"schemas":"--- original\n+++ modified\n@@ -18,8 +18,8 @@\n DEFINE TABLE IF NOT EXISTS conversation SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE datetime;\n\n # Custom fields from the Conversation struct\n DEFINE FIELD IF NOT EXISTS user_id ON conversation TYPE string;\n@@ -34,8 +34,8 @@\n DEFINE TABLE IF NOT EXISTS file SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON file TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON file TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON file TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON file TYPE datetime;\n\n # Custom fields from the FileInfo struct\n DEFINE FIELD IF NOT EXISTS sha256 ON file TYPE string;\n@@ -54,13 +54,10 @@\n DEFINE TABLE IF NOT EXISTS ingestion_task SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime;\n\n-# Custom fields from the IngestionTask struct\n-# IngestionPayload is complex, store as object\n DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;\n-# IngestionTaskStatus can hold data (InProgress), store as object\n DEFINE FIELD IF NOT EXISTS status ON ingestion_task TYPE object;\n DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;\n\n@@ -74,8 +71,8 @@\n DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE datetime;\n\n # Custom fields from the KnowledgeEntity struct\n DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity TYPE string;\n@@ -86,27 +83,42 @@\n # metadata is Option, store as object\n DEFINE FIELD IF NOT EXISTS metadata ON knowledge_entity TYPE option;\n\n-# Define embedding as a standard array of floats for schema definition\n-DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity TYPE array;\n-# The specific vector nature is handled by the index definition below\n-\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;\n\n-# Indexes based on build_indexes and query patterns\n-# The INDEX definition correctly specifies the vector properties\n-DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n-DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n+-- Indexes based on build_indexes and query patterns\n+-- HNSW index now defined on knowledge_entity_embedding table for better memory usage\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n-DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at; # For get_latest_knowledge_entities\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n+\n+-- Defines the schema for the 'knowledge_entity_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n+\n+DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;\n+\n+-- Standard fields\n+DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;\n+\n+-- Custom fields\n+DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record;\n+DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array;\n\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;\n+\n # Defines the schema for the 'message' table.\n\n DEFINE TABLE IF NOT EXISTS message SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON message TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON message TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON message TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON message TYPE datetime;\n\n # Custom fields from the Message struct\n DEFINE FIELD IF NOT EXISTS conversation_id ON message TYPE string;\n@@ -140,6 +152,30 @@\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;\n\n+# Defines the schema for the 'scratchpad' table.\n+\n+DEFINE TABLE IF NOT EXISTS scratchpad SCHEMALESS;\n+\n+# Standard fields from stored_object! macro\n+DEFINE FIELD IF NOT EXISTS created_at ON scratchpad TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON scratchpad TYPE datetime;\n+\n+# Custom fields from the Scratchpad struct\n+DEFINE FIELD IF NOT EXISTS user_id ON scratchpad TYPE string;\n+DEFINE FIELD IF NOT EXISTS title ON scratchpad TYPE string;\n+DEFINE FIELD IF NOT EXISTS content ON scratchpad TYPE string;\n+DEFINE FIELD IF NOT EXISTS last_saved_at ON scratchpad TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS is_dirty ON scratchpad TYPE bool DEFAULT false;\n+DEFINE FIELD IF NOT EXISTS is_archived ON scratchpad TYPE bool DEFAULT false;\n+DEFINE FIELD IF NOT EXISTS archived_at ON scratchpad TYPE option;\n+DEFINE FIELD IF NOT EXISTS ingested_at ON scratchpad TYPE option;\n+\n+# Indexes based on query patterns\n+DEFINE INDEX IF NOT EXISTS scratchpad_user_idx ON scratchpad FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS scratchpad_user_archived_idx ON scratchpad FIELDS user_id, is_archived;\n+DEFINE INDEX IF NOT EXISTS scratchpad_updated_idx ON scratchpad FIELDS updated_at;\n+DEFINE INDEX IF NOT EXISTS scratchpad_archived_idx ON scratchpad FIELDS archived_at;\n+\n DEFINE TABLE OVERWRITE script_migration SCHEMAFULL\n PERMISSIONS\n FOR select FULL\n@@ -157,8 +193,13 @@\n DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\n DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n@@ -165,32 +206,47 @@\n DEFINE TABLE IF NOT EXISTS text_chunk SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE datetime;\n\n # Custom fields from the TextChunk struct\n DEFINE FIELD IF NOT EXISTS source_id ON text_chunk TYPE string;\n DEFINE FIELD IF NOT EXISTS chunk ON text_chunk TYPE string;\n\n-# Define embedding as a standard array of floats for schema definition\n-DEFINE FIELD IF NOT EXISTS embedding ON text_chunk TYPE array;\n-# The specific vector nature is handled by the index definition below\n-\n DEFINE FIELD IF NOT EXISTS user_id ON text_chunk TYPE string;\n\n # Indexes based on build_indexes and query patterns (delete_by_source_id)\n-# The INDEX definition correctly specifies the vector properties\n-DEFINE INDEX IF NOT EXISTS idx_embedding_chunks ON text_chunk FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;\n\n+-- Defines the schema for the 'text_chunk_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n+\n+DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;\n+\n+# Standard fields\n+DEFINE FIELD IF NOT EXISTS created_at ON text_chunk_embedding TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk_embedding TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS user_id ON text_chunk_embedding TYPE string;\n+DEFINE FIELD IF NOT EXISTS source_id ON text_chunk_embedding TYPE string;\n+\n+# Custom fields\n+DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record;\n+DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array;\n+\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n+DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n+DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n+\n # Defines the schema for the 'text_content' table.\n\n DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE datetime;\n\n # Custom fields from the TextContent struct\n DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;\n@@ -213,8 +269,8 @@\n DEFINE TABLE IF NOT EXISTS user SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON user TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON user TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON user TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON user TYPE datetime;\n\n # Custom fields from the User struct\n DEFINE FIELD IF NOT EXISTS email ON user TYPE string;\n","events":null} \ No newline at end of file diff --git a/common/migrations/definitions/_initial.json b/common/migrations/definitions/_initial.json index 7c40dd1..f6c0308 100644 --- a/common/migrations/definitions/_initial.json +++ b/common/migrations/definitions/_initial.json @@ -1 +1 @@ -{"schemas":"# Defines the schema for the 'analytics' table.\n\nDEFINE TABLE IF NOT EXISTS analytics SCHEMALESS;\n\n# Custom fields from the Analytics struct\nDEFINE FIELD IF NOT EXISTS page_loads ON analytics TYPE number;\nDEFINE FIELD IF NOT EXISTS visitors ON analytics TYPE number;\n\n# Defines authentication scope and access rules.\n# This mirrors the logic previously in SurrealDbClient::setup_auth\n\nDEFINE ACCESS IF NOT EXISTS account ON DATABASE TYPE RECORD\n SIGNUP ( CREATE user SET email = $email, password = crypto::argon2::generate($password), anonymous = false, user_id = $user_id) # Ensure user_id is provided if needed\n SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(password, $password) );\n\n# Defines the schema for the 'conversation' table.\n\nDEFINE TABLE IF NOT EXISTS conversation SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE datetime;\n\n# Custom fields from the Conversation struct\nDEFINE FIELD IF NOT EXISTS user_id ON conversation TYPE string;\nDEFINE FIELD IF NOT EXISTS title ON conversation TYPE string;\n\n# Add indexes based on query patterns (get_complete_conversation ownership check, get_user_conversations)\nDEFINE INDEX IF NOT EXISTS conversation_user_id_idx ON conversation FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS conversation_created_at_idx ON conversation FIELDS created_at; # For get_user_conversations ORDER BY\n\n# Defines the schema for the 'file' table (used by FileInfo).\n\nDEFINE TABLE IF NOT EXISTS file SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON file TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON file TYPE datetime;\n\n# Custom fields from the FileInfo struct\nDEFINE FIELD IF NOT EXISTS sha256 ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS path ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS file_name ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS mime_type ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS user_id ON file TYPE string;\n\n# Indexes based on usage (get_by_sha, potentially user lookups)\n# Using UNIQUE based on the logic in FileInfo::new to prevent duplicates\nDEFINE INDEX IF NOT EXISTS file_sha256_idx ON file FIELDS sha256 UNIQUE;\nDEFINE INDEX IF NOT EXISTS file_user_id_idx ON file FIELDS user_id;\n\n# Defines the schema for the 'ingestion_task' table (used by IngestionTask).\n\nDEFINE TABLE IF NOT EXISTS ingestion_task SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime;\n\nDEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;\nDEFINE FIELD IF NOT EXISTS status ON ingestion_task TYPE object;\nDEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;\n\n# Indexes explicitly defined in build_indexes and useful for get_unfinished_tasks\nDEFINE INDEX IF NOT EXISTS idx_ingestion_task_status ON ingestion_task FIELDS status;\nDEFINE INDEX IF NOT EXISTS idx_ingestion_task_user ON ingestion_task FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS idx_ingestion_task_created ON ingestion_task FIELDS created_at;\n\n# Defines the schema for the 'knowledge_entity' table.\n\nDEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE datetime;\n\n# Custom fields from the KnowledgeEntity struct\nDEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity TYPE string;\nDEFINE FIELD IF NOT EXISTS name ON knowledge_entity TYPE string;\nDEFINE FIELD IF NOT EXISTS description ON knowledge_entity TYPE string;\n# KnowledgeEntityType is an enum, store as string\nDEFINE FIELD IF NOT EXISTS entity_type ON knowledge_entity TYPE string;\n# metadata is Option, store as object\nDEFINE FIELD IF NOT EXISTS metadata ON knowledge_entity TYPE option;\n\nDEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;\n\n# Indexes based on build_indexes and query patterns\n# The INDEX definition correctly specifies the vector properties\n# HNSW index now defined on knowledge_entity_embedding table for better memory usage \n# DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n# Defines the schema for the 'knowledge_entity_embedding' table.\n# Separate table to optimize HNSW index creation memory usage\n\nDEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;\nDEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;\n\n# Custom fields\nDEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record;\nDEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array;\n\n# Indexes\n# DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;\n\n# Defines the schema for the 'message' table.\n\nDEFINE TABLE IF NOT EXISTS message SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON message TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON message TYPE datetime;\n\n# Custom fields from the Message struct\nDEFINE FIELD IF NOT EXISTS conversation_id ON message TYPE string;\n# MessageRole is an enum, store as string\nDEFINE FIELD IF NOT EXISTS role ON message TYPE string;\nDEFINE FIELD IF NOT EXISTS content ON message TYPE string;\n# references is Option>, store as array\nDEFINE FIELD IF NOT EXISTS references ON message TYPE option>;\n\n# Indexes based on query patterns (get_complete_conversation)\nDEFINE INDEX IF NOT EXISTS message_conversation_id_idx ON message FIELDS conversation_id;\nDEFINE INDEX IF NOT EXISTS message_updated_at_idx ON message FIELDS updated_at; # For ORDER BY\n\n# Defines the 'relates_to' edge table for KnowledgeRelationships.\n# Edges connect nodes, in this case knowledge_entity records.\n\n# Define the edge table itself, enforcing connections between knowledge_entity records\n# SCHEMAFULL requires all fields to be defined, maybe start with SCHEMALESS if metadata might vary\nDEFINE TABLE IF NOT EXISTS relates_to SCHEMALESS TYPE RELATION FROM knowledge_entity TO knowledge_entity;\n\n# Define the metadata field within the edge\n# RelationshipMetadata is a struct, store as object\nDEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;\n\n# Optionally, define fields within the metadata object for stricter schema (requires SCHEMAFULL on table)\n# DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;\n# DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;\n# DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;\n\n# Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)\nDEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;\nDEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;\n\n# Defines the schema for the 'scratchpad' table.\n\nDEFINE TABLE IF NOT EXISTS scratchpad SCHEMALESS;\n\n# Standard fields from stored_object! macro\nDEFINE FIELD IF NOT EXISTS created_at ON scratchpad TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON scratchpad TYPE datetime;\n\n# Custom fields from the Scratchpad struct\nDEFINE FIELD IF NOT EXISTS user_id ON scratchpad TYPE string;\nDEFINE FIELD IF NOT EXISTS title ON scratchpad TYPE string;\nDEFINE FIELD IF NOT EXISTS content ON scratchpad TYPE string;\nDEFINE FIELD IF NOT EXISTS last_saved_at ON scratchpad TYPE datetime;\nDEFINE FIELD IF NOT EXISTS is_dirty ON scratchpad TYPE bool DEFAULT false;\nDEFINE FIELD IF NOT EXISTS is_archived ON scratchpad TYPE bool DEFAULT false;\nDEFINE FIELD IF NOT EXISTS archived_at ON scratchpad TYPE option;\nDEFINE FIELD IF NOT EXISTS ingested_at ON scratchpad TYPE option;\n\n# Indexes based on query patterns\nDEFINE INDEX IF NOT EXISTS scratchpad_user_idx ON scratchpad FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS scratchpad_user_archived_idx ON scratchpad FIELDS user_id, is_archived;\nDEFINE INDEX IF NOT EXISTS scratchpad_updated_idx ON scratchpad FIELDS updated_at;\nDEFINE INDEX IF NOT EXISTS scratchpad_archived_idx ON scratchpad FIELDS archived_at;\n\nDEFINE TABLE OVERWRITE script_migration SCHEMAFULL\n PERMISSIONS\n FOR select FULL\n FOR create, update, delete NONE;\n\nDEFINE FIELD OVERWRITE script_name ON script_migration TYPE string;\nDEFINE FIELD OVERWRITE executed_at ON script_migration TYPE datetime VALUE time::now() READONLY;\n\n# Defines the schema for the 'system_settings' table.\n\nDEFINE TABLE IF NOT EXISTS system_settings SCHEMALESS;\n\n# Custom fields from the SystemSettings struct\nDEFINE FIELD IF NOT EXISTS registrations_enabled ON system_settings TYPE bool;\nDEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\nDEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\nDEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;\n\n# Defines the schema for the 'text_chunk' table.\n\nDEFINE TABLE IF NOT EXISTS text_chunk SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE datetime;\n\n# Custom fields from the TextChunk struct\nDEFINE FIELD IF NOT EXISTS source_id ON text_chunk TYPE string;\nDEFINE FIELD IF NOT EXISTS chunk ON text_chunk TYPE string;\n\nDEFINE FIELD IF NOT EXISTS user_id ON text_chunk TYPE string;\n\n# Indexes based on build_indexes and query patterns (delete_by_source_id)\nDEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;\nDEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;\n\n# Defines the schema for the 'text_chunk_embedding' table.\n# Separate table to optimize HNSW index creation memory usage\n\nDEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON text_chunk_embedding TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON text_chunk_embedding TYPE datetime;\nDEFINE FIELD IF NOT EXISTS user_id ON text_chunk_embedding TYPE string;\nDEFINE FIELD IF NOT EXISTS source_id ON text_chunk_embedding TYPE string;\n\n# Custom fields\nDEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record;\nDEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array;\n\n# Indexes\n# DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\nDEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\nDEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n\n# Defines the schema for the 'text_content' table.\n\nDEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE datetime;\n\n# Custom fields from the TextContent struct\nDEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;\n# FileInfo is a struct, store as object\nDEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option;\n# UrlInfo is a struct, store as object\nDEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option;\nDEFINE FIELD IF NOT EXISTS context ON text_content TYPE option;\nDEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;\nDEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;\n\n# Indexes based on query patterns\nDEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;\nDEFINE INDEX IF NOT EXISTS text_content_category_idx ON text_content FIELDS category;\n\n# Defines the schema for the 'user' table.\n# NOTE: Authentication scope and access rules are defined in auth.surql\n\nDEFINE TABLE IF NOT EXISTS user SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON user TYPE datetime;\nDEFINE FIELD IF NOT EXISTS updated_at ON user TYPE datetime;\n\n# Custom fields from the User struct\nDEFINE FIELD IF NOT EXISTS email ON user TYPE string;\nDEFINE FIELD IF NOT EXISTS password ON user TYPE string; # Stores the hashed password\nDEFINE FIELD IF NOT EXISTS anonymous ON user TYPE bool;\nDEFINE FIELD IF NOT EXISTS api_key ON user TYPE option;\nDEFINE FIELD IF NOT EXISTS admin ON user TYPE bool;\nDEFINE FIELD IF NOT EXISTS timezone ON user TYPE string;\n\n# Indexes based on query patterns (find_by_email, find_by_api_key, unique constraint from setup_auth)\nDEFINE INDEX IF NOT EXISTS user_email_idx ON user FIELDS email UNIQUE;\nDEFINE INDEX IF NOT EXISTS user_api_key_idx ON user FIELDS api_key;\n","events":""} \ No newline at end of file +{"schemas": "# Defines the schema for the 'analytics' table.\n\nDEFINE TABLE IF NOT EXISTS analytics SCHEMALESS;\n\n# Custom fields from the Analytics struct\nDEFINE FIELD IF NOT EXISTS page_loads ON analytics TYPE number;\nDEFINE FIELD IF NOT EXISTS visitors ON analytics TYPE number;\n\n# Defines authentication scope and access rules.\n# This mirrors the logic previously in SurrealDbClient::setup_auth\n\nDEFINE ACCESS IF NOT EXISTS account ON DATABASE TYPE RECORD\n SIGNUP ( CREATE user SET email = $email, password = crypto::argon2::generate($password), anonymous = false, user_id = $user_id) # Ensure user_id is provided if needed\n SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(password, $password) );\n\n# Defines the schema for the 'conversation' table.\n\nDEFINE TABLE IF NOT EXISTS conversation SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE string;\n\n# Custom fields from the Conversation struct\nDEFINE FIELD IF NOT EXISTS user_id ON conversation TYPE string;\nDEFINE FIELD IF NOT EXISTS title ON conversation TYPE string;\n\n# Add indexes based on query patterns (get_complete_conversation ownership check, get_user_conversations)\nDEFINE INDEX IF NOT EXISTS conversation_user_id_idx ON conversation FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS conversation_created_at_idx ON conversation FIELDS created_at; # For get_user_conversations ORDER BY\n\n# Defines the schema for the 'file' table (used by FileInfo).\n\nDEFINE TABLE IF NOT EXISTS file SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON file TYPE string;\n\n# Custom fields from the FileInfo struct\nDEFINE FIELD IF NOT EXISTS sha256 ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS path ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS file_name ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS mime_type ON file TYPE string;\nDEFINE FIELD IF NOT EXISTS user_id ON file TYPE string;\n\n# Indexes based on usage (get_by_sha, potentially user lookups)\n# Using UNIQUE based on the logic in FileInfo::new to prevent duplicates\nDEFINE INDEX IF NOT EXISTS file_sha256_idx ON file FIELDS sha256 UNIQUE;\nDEFINE INDEX IF NOT EXISTS file_user_id_idx ON file FIELDS user_id;\n\n# Defines the schema for the 'ingestion_task' table (used by IngestionTask).\n\nDEFINE TABLE IF NOT EXISTS ingestion_task SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE string;\n\n# Custom fields from the IngestionTask struct\n# IngestionPayload is complex, store as object\nDEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;\n# IngestionTaskStatus can hold data (InProgress), store as object\nDEFINE FIELD IF NOT EXISTS status ON ingestion_task TYPE object;\nDEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;\n\n# Indexes explicitly defined in build_indexes and useful for get_unfinished_tasks\nDEFINE INDEX IF NOT EXISTS idx_ingestion_task_status ON ingestion_task FIELDS status;\nDEFINE INDEX IF NOT EXISTS idx_ingestion_task_user ON ingestion_task FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS idx_ingestion_task_created ON ingestion_task FIELDS created_at;\n\n# Defines the schema for the 'knowledge_entity' table.\n\nDEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE string;\n\n# Custom fields from the KnowledgeEntity struct\nDEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity TYPE string;\nDEFINE FIELD IF NOT EXISTS name ON knowledge_entity TYPE string;\nDEFINE FIELD IF NOT EXISTS description ON knowledge_entity TYPE string;\n# KnowledgeEntityType is an enum, store as string\nDEFINE FIELD IF NOT EXISTS entity_type ON knowledge_entity TYPE string;\n# metadata is Option, store as object\nDEFINE FIELD IF NOT EXISTS metadata ON knowledge_entity TYPE option;\n\n# Define embedding as a standard array of floats for schema definition\nDEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity TYPE array;\n# The specific vector nature is handled by the index definition below\n\nDEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;\n\n# Indexes based on build_indexes and query patterns\n# The INDEX definition correctly specifies the vector properties\nDEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\nDEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at; # For get_latest_knowledge_entities\n\n# Defines the schema for the 'message' table.\n\nDEFINE TABLE IF NOT EXISTS message SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON message TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON message TYPE string;\n\n# Custom fields from the Message struct\nDEFINE FIELD IF NOT EXISTS conversation_id ON message TYPE string;\n# MessageRole is an enum, store as string\nDEFINE FIELD IF NOT EXISTS role ON message TYPE string;\nDEFINE FIELD IF NOT EXISTS content ON message TYPE string;\n# references is Option>, store as array\nDEFINE FIELD IF NOT EXISTS references ON message TYPE option>;\n\n# Indexes based on query patterns (get_complete_conversation)\nDEFINE INDEX IF NOT EXISTS message_conversation_id_idx ON message FIELDS conversation_id;\nDEFINE INDEX IF NOT EXISTS message_updated_at_idx ON message FIELDS updated_at; # For ORDER BY\n\n# Defines the 'relates_to' edge table for KnowledgeRelationships.\n# Edges connect nodes, in this case knowledge_entity records.\n\n# Define the edge table itself, enforcing connections between knowledge_entity records\n# SCHEMAFULL requires all fields to be defined, maybe start with SCHEMALESS if metadata might vary\nDEFINE TABLE IF NOT EXISTS relates_to SCHEMALESS TYPE RELATION FROM knowledge_entity TO knowledge_entity;\n\n# Define the metadata field within the edge\n# RelationshipMetadata is a struct, store as object\nDEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;\n\n# Optionally, define fields within the metadata object for stricter schema (requires SCHEMAFULL on table)\n# DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;\n# DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;\n# DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;\n\n# Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)\nDEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;\nDEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;\n\nDEFINE TABLE OVERWRITE script_migration SCHEMAFULL\n PERMISSIONS\n FOR select FULL\n FOR create, update, delete NONE;\n\nDEFINE FIELD OVERWRITE script_name ON script_migration TYPE string;\nDEFINE FIELD OVERWRITE executed_at ON script_migration TYPE datetime VALUE time::now() READONLY;\n\n# Defines the schema for the 'system_settings' table.\n\nDEFINE TABLE IF NOT EXISTS system_settings SCHEMALESS;\n\n# Custom fields from the SystemSettings struct\nDEFINE FIELD IF NOT EXISTS registrations_enabled ON system_settings TYPE bool;\nDEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\nDEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\nDEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n\n# Defines the schema for the 'text_chunk' table.\n\nDEFINE TABLE IF NOT EXISTS text_chunk SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE string;\n\n# Custom fields from the TextChunk struct\nDEFINE FIELD IF NOT EXISTS source_id ON text_chunk TYPE string;\nDEFINE FIELD IF NOT EXISTS chunk ON text_chunk TYPE string;\n\n# Define embedding as a standard array of floats for schema definition\nDEFINE FIELD IF NOT EXISTS embedding ON text_chunk TYPE array;\n# The specific vector nature is handled by the index definition below\n\nDEFINE FIELD IF NOT EXISTS user_id ON text_chunk TYPE string;\n\n# Indexes based on build_indexes and query patterns (delete_by_source_id)\n# The INDEX definition correctly specifies the vector properties\nDEFINE INDEX IF NOT EXISTS idx_embedding_chunks ON text_chunk FIELDS embedding HNSW DIMENSION 1536;\nDEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;\nDEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;\n\n# Defines the schema for the 'text_content' table.\n\nDEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE string;\n\n# Custom fields from the TextContent struct\nDEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;\n# FileInfo is a struct, store as object\nDEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option;\n# UrlInfo is a struct, store as object\nDEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option;\nDEFINE FIELD IF NOT EXISTS context ON text_content TYPE option;\nDEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;\nDEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;\n\n# Indexes based on query patterns\nDEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;\nDEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;\nDEFINE INDEX IF NOT EXISTS text_content_category_idx ON text_content FIELDS category;\n\n# Defines the schema for the 'user' table.\n# NOTE: Authentication scope and access rules are defined in auth.surql\n\nDEFINE TABLE IF NOT EXISTS user SCHEMALESS;\n\n# Standard fields\nDEFINE FIELD IF NOT EXISTS created_at ON user TYPE string;\nDEFINE FIELD IF NOT EXISTS updated_at ON user TYPE string;\n\n# Custom fields from the User struct\nDEFINE FIELD IF NOT EXISTS email ON user TYPE string;\nDEFINE FIELD IF NOT EXISTS password ON user TYPE string; # Stores the hashed password\nDEFINE FIELD IF NOT EXISTS anonymous ON user TYPE bool;\nDEFINE FIELD IF NOT EXISTS api_key ON user TYPE option;\nDEFINE FIELD IF NOT EXISTS admin ON user TYPE bool;\nDEFINE FIELD IF NOT EXISTS timezone ON user TYPE string;\n\n# Indexes based on query patterns (find_by_email, find_by_api_key, unique constraint from setup_auth)\nDEFINE INDEX IF NOT EXISTS user_email_idx ON user FIELDS email UNIQUE;\nDEFINE INDEX IF NOT EXISTS user_api_key_idx ON user FIELDS api_key;\n", "events": ""} \ No newline at end of file diff --git a/common/src/storage/types/text_content.rs b/common/src/storage/types/text_content.rs index fe11014..02f42a8 100644 --- a/common/src/storage/types/text_content.rs +++ b/common/src/storage/types/text_content.rs @@ -51,8 +51,11 @@ pub struct TextContentSearchResult { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub struct UrlInfo { + #[serde(default)] pub url: String, + #[serde(default)] pub title: String, + #[serde(default)] pub image_id: String, }