mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-30 03:10:45 +02:00
chore: harden common storage bootstrap and slim embedded db assets
Unify embedding config, build providers from system settings, and fail startup when index builds error or time out. Move Surreal assets under common/db so embeds exclude crate source, and read storage via streams.
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
-- Ensure 'analytics:current' record exists
|
||||
IF NOT (SELECT * FROM analytics:current) THEN
|
||||
CREATE analytics:current CONTENT {
|
||||
page_loads: 0,
|
||||
visitors: 0
|
||||
};
|
||||
END;
|
||||
|
||||
-- Ensure 'system_settings:current' record exists
|
||||
IF NOT (SELECT * FROM system_settings:current) THEN
|
||||
CREATE system_settings:current CONTENT {
|
||||
registrations_enabled: true,
|
||||
require_email_verification: false,
|
||||
query_model: "gpt-4o-mini",
|
||||
processing_model: "gpt-4o-mini",
|
||||
embedding_model: "text-embedding-3-small",
|
||||
voice_processing_model: "whisper-1",
|
||||
image_processing_model: "gpt-4o-mini",
|
||||
image_processing_prompt: "Analyze this image and respond based on its primary content:\n - If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.\n - If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.\n - For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a Text: heading.\n\n Respond directly with the analysis.",
|
||||
embedding_dimensions: 1536,
|
||||
query_system_prompt: "You are a knowledgeable assistant with access to a specialized knowledge base. You will be provided with relevant knowledge entities from the database as context. Each knowledge entity contains a name, description, and type, representing different concepts, ideas, and information.\nYour task is to:\n1. Carefully analyze the provided knowledge entities in the context\n2. Answer user questions based on this information\n3. Provide clear, concise, and accurate responses\n4. When referencing information, briefly mention which knowledge entity it came from\n5. If the provided context doesn't contain enough information to answer the question confidently, clearly state this\n6. If only partial information is available, explain what you can answer and what information is missing\n7. Avoid making assumptions or providing information not supported by the context\n8. Output the references to the documents. Use the UUIDs and make sure they are correct!\nRemember:\n- Be direct and honest about the limitations of your knowledge\n- Cite the relevant knowledge entities when providing information, but only provide the UUIDs in the reference array\n- If you need to combine information from multiple entities, explain how they connect\n- Don't speculate beyond what's provided in the context\nExample response formats:\n\"Based on [Entity Name], [answer...]\"\n\"I found relevant information in multiple entries: [explanation...]\"\n\"I apologize, but the provided context doesn't contain information about [topic]\"",
|
||||
ingestion_system_prompt: "You are an AI assistant. You will receive a text content, along with user context and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users context and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.\nThe JSON should have the following structure:\n{\n\"knowledge_entities\": [\n{\n\"key\": \"unique-key-1\",\n\"name\": \"Entity Name\",\n\"description\": \"A detailed description of the entity.\",\n\"entity_type\": \"TypeOfEntity\"\n},\n// More entities...\n],\n\"relationships\": [\n{\n\"type\": \"RelationshipType\",\n\"source\": \"unique-key-1 or UUID from existing database\",\n\"target\": \"unique-key-1 or UUID from existing database\"\n},\n// More relationships...\n]\n}\nGuidelines:\n1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.\n2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.\n3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.\n4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.\n5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity.\n6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.\n7. Only create relationships between existing KnowledgeEntities.\n8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.\n9. A new relationship MUST include a newly created KnowledgeEntity."
|
||||
};
|
||||
END;
|
||||
@@ -0,0 +1,8 @@
|
||||
DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
|
||||
DEFINE FIELD OVERWRITE instructions ON text_content TYPE option<string>;
|
||||
|
||||
UPDATE text_content SET context = instructions WHERE instructions IS NOT NONE;
|
||||
|
||||
UPDATE text_content UNSET instructions;
|
||||
|
||||
REMOVE FIELD instructions ON TABLE text_content;
|
||||
@@ -0,0 +1,2 @@
|
||||
-- Runtime-managed: text_content FTS indexes now created at startup via the shared Surreal helper.
|
||||
-- This migration is intentionally left as a no-op to avoid heavy index builds during migration.
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;
|
||||
|
||||
UPDATE system_settings:current SET
|
||||
embedding_model = "text-embedding-3-small",
|
||||
embedding_dimensions = 1536
|
||||
WHERE embedding_model == NONE && embedding_dimensions == NONE;
|
||||
@@ -0,0 +1,7 @@
|
||||
DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;
|
||||
|
||||
UPDATE system_settings:current SET
|
||||
image_processing_model = "gpt-4o-mini",
|
||||
image_processing_prompt = "Analyze this image and respond based on its primary content:\n - If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.\n - If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.\n - For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a Text: heading.\n\n Respond directly with the analysis."
|
||||
WHERE image_processing_model == NONE && image_processing_prompt == NONE;
|
||||
@@ -0,0 +1 @@
|
||||
-- No-op: legacy `job` table was superseded by `ingestion_task`; kept for migration order compatibility.
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;
|
||||
|
||||
UPDATE system_settings:current SET
|
||||
voice_processing_model = "whisper-1"
|
||||
WHERE voice_processing_model == NONE;
|
||||
@@ -0,0 +1,115 @@
|
||||
-- Align timestamp fields with SurrealDB native datetime type.
|
||||
|
||||
-- User timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON user FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON user FLEXIBLE;
|
||||
|
||||
UPDATE user SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE user SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON user TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON user TYPE datetime;
|
||||
|
||||
-- Text content timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON text_content FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON text_content FLEXIBLE;
|
||||
|
||||
UPDATE text_content SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE text_content SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON text_content TYPE datetime;
|
||||
|
||||
REBUILD INDEX text_content_created_at_idx ON text_content;
|
||||
|
||||
-- Text chunk timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON text_chunk FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON text_chunk FLEXIBLE;
|
||||
|
||||
UPDATE text_chunk SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE text_chunk SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON text_chunk TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON text_chunk TYPE datetime;
|
||||
|
||||
-- Knowledge entity timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON knowledge_entity FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON knowledge_entity FLEXIBLE;
|
||||
|
||||
UPDATE knowledge_entity SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE knowledge_entity SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON knowledge_entity TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON knowledge_entity TYPE datetime;
|
||||
|
||||
REBUILD INDEX knowledge_entity_created_at_idx ON knowledge_entity;
|
||||
|
||||
-- Conversation timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON conversation FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON conversation FLEXIBLE;
|
||||
|
||||
UPDATE conversation SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE conversation SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON conversation TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON conversation TYPE datetime;
|
||||
|
||||
REBUILD INDEX conversation_created_at_idx ON conversation;
|
||||
|
||||
-- Message timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON message FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON message FLEXIBLE;
|
||||
|
||||
UPDATE message SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE message SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON message TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON message TYPE datetime;
|
||||
|
||||
REBUILD INDEX message_updated_at_idx ON message;
|
||||
|
||||
-- Ingestion task timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON ingestion_task FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON ingestion_task FLEXIBLE;
|
||||
|
||||
UPDATE ingestion_task SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE ingestion_task SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON ingestion_task TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON ingestion_task TYPE datetime;
|
||||
|
||||
REBUILD INDEX idx_ingestion_task_created ON ingestion_task;
|
||||
|
||||
-- File timestamps
|
||||
DEFINE FIELD OVERWRITE created_at ON file FLEXIBLE;
|
||||
DEFINE FIELD OVERWRITE updated_at ON file FLEXIBLE;
|
||||
|
||||
UPDATE file SET created_at = type::datetime(created_at)
|
||||
WHERE type::is::string(created_at) AND created_at != "";
|
||||
|
||||
UPDATE file SET updated_at = type::datetime(updated_at)
|
||||
WHERE type::is::string(updated_at) AND updated_at != "";
|
||||
|
||||
DEFINE FIELD OVERWRITE created_at ON file TYPE datetime;
|
||||
DEFINE FIELD OVERWRITE updated_at ON file TYPE datetime;
|
||||
@@ -0,0 +1 @@
|
||||
-- Runtime-managed: FTS indexes now built at startup; migration retained as a no-op.
|
||||
@@ -0,0 +1,173 @@
|
||||
-- State machine migration for ingestion_task records
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS state ON TABLE ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS attempts ON TABLE ingestion_task TYPE option<number>;
|
||||
DEFINE FIELD IF NOT EXISTS max_attempts ON TABLE ingestion_task TYPE option<number>;
|
||||
DEFINE FIELD IF NOT EXISTS scheduled_at ON TABLE ingestion_task TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS locked_at ON TABLE ingestion_task TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS lease_duration_secs ON TABLE ingestion_task TYPE option<number>;
|
||||
DEFINE FIELD IF NOT EXISTS worker_id ON TABLE ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS error_code ON TABLE ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS error_message ON TABLE ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS last_error_at ON TABLE ingestion_task TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS priority ON TABLE ingestion_task TYPE option<number>;
|
||||
|
||||
REMOVE FIELD status ON TABLE ingestion_task;
|
||||
DEFINE FIELD status ON TABLE ingestion_task TYPE option<object>;
|
||||
|
||||
DEFINE INDEX IF NOT EXISTS idx_ingestion_task_state_sched ON TABLE ingestion_task FIELDS state, scheduled_at;
|
||||
|
||||
LET $needs_migration = (SELECT count() AS count FROM type::table('ingestion_task') WHERE state = NONE)[0].count;
|
||||
|
||||
IF $needs_migration > 0 THEN {
|
||||
-- Created -> Pending
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET
|
||||
state = "Pending",
|
||||
attempts = 0,
|
||||
max_attempts = 3,
|
||||
scheduled_at = IF created_at != NONE THEN created_at ELSE time::now() END,
|
||||
locked_at = NONE,
|
||||
lease_duration_secs = 300,
|
||||
worker_id = NONE,
|
||||
error_code = NONE,
|
||||
error_message = NONE,
|
||||
last_error_at = NONE,
|
||||
priority = 0
|
||||
WHERE state = NONE
|
||||
AND status != NONE
|
||||
AND status.name = "Created";
|
||||
|
||||
-- InProgress -> Processing
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET
|
||||
state = "Processing",
|
||||
attempts = IF status.attempts != NONE THEN status.attempts ELSE 1 END,
|
||||
max_attempts = 3,
|
||||
scheduled_at = IF status.last_attempt != NONE THEN status.last_attempt ELSE time::now() END,
|
||||
locked_at = IF status.last_attempt != NONE THEN status.last_attempt ELSE time::now() END,
|
||||
lease_duration_secs = 300,
|
||||
worker_id = NONE,
|
||||
error_code = NONE,
|
||||
error_message = NONE,
|
||||
last_error_at = NONE,
|
||||
priority = 0
|
||||
WHERE state = NONE
|
||||
AND status != NONE
|
||||
AND status.name = "InProgress";
|
||||
|
||||
-- Completed -> Succeeded
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET
|
||||
state = "Succeeded",
|
||||
attempts = 1,
|
||||
max_attempts = 3,
|
||||
scheduled_at = IF updated_at != NONE THEN updated_at ELSE time::now() END,
|
||||
locked_at = NONE,
|
||||
lease_duration_secs = 300,
|
||||
worker_id = NONE,
|
||||
error_code = NONE,
|
||||
error_message = NONE,
|
||||
last_error_at = NONE,
|
||||
priority = 0
|
||||
WHERE state = NONE
|
||||
AND status != NONE
|
||||
AND status.name = "Completed";
|
||||
|
||||
-- Error -> DeadLetter (terminal failure)
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET
|
||||
state = "DeadLetter",
|
||||
attempts = 3,
|
||||
max_attempts = 3,
|
||||
scheduled_at = IF updated_at != NONE THEN updated_at ELSE time::now() END,
|
||||
locked_at = NONE,
|
||||
lease_duration_secs = 300,
|
||||
worker_id = NONE,
|
||||
error_code = NONE,
|
||||
error_message = status.message,
|
||||
last_error_at = IF updated_at != NONE THEN updated_at ELSE time::now() END,
|
||||
priority = 0
|
||||
WHERE state = NONE
|
||||
AND status != NONE
|
||||
AND status.name = "Error";
|
||||
|
||||
-- Cancelled -> Cancelled
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET
|
||||
state = "Cancelled",
|
||||
attempts = 0,
|
||||
max_attempts = 3,
|
||||
scheduled_at = IF updated_at != NONE THEN updated_at ELSE time::now() END,
|
||||
locked_at = NONE,
|
||||
lease_duration_secs = 300,
|
||||
worker_id = NONE,
|
||||
error_code = NONE,
|
||||
error_message = NONE,
|
||||
last_error_at = NONE,
|
||||
priority = 0
|
||||
WHERE state = NONE
|
||||
AND status != NONE
|
||||
AND status.name = "Cancelled";
|
||||
|
||||
-- Fallback for any remaining records missing state
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET
|
||||
state = "Pending",
|
||||
attempts = 0,
|
||||
max_attempts = 3,
|
||||
scheduled_at = IF updated_at != NONE THEN updated_at ELSE time::now() END,
|
||||
locked_at = NONE,
|
||||
lease_duration_secs = 300,
|
||||
worker_id = NONE,
|
||||
error_code = NONE,
|
||||
error_message = NONE,
|
||||
last_error_at = NONE,
|
||||
priority = 0
|
||||
WHERE state = NONE;
|
||||
} END;
|
||||
|
||||
-- Ensure defaults for newly added fields
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET max_attempts = 3
|
||||
WHERE max_attempts = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET lease_duration_secs = 300
|
||||
WHERE lease_duration_secs = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET attempts = 0
|
||||
WHERE attempts = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET priority = 0
|
||||
WHERE priority = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET scheduled_at = IF updated_at != NONE THEN updated_at ELSE time::now() END
|
||||
WHERE scheduled_at = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET locked_at = NONE
|
||||
WHERE locked_at = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET worker_id = NONE
|
||||
WHERE worker_id != NONE AND worker_id = "";
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET error_code = NONE
|
||||
WHERE error_code = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET error_message = NONE
|
||||
WHERE error_message = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET last_error_at = NONE
|
||||
WHERE last_error_at = NONE;
|
||||
|
||||
UPDATE type::table('ingestion_task')
|
||||
SET status = NONE
|
||||
WHERE status != NONE;
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Add scratchpad table and schema
|
||||
|
||||
-- Define scratchpad table and schema
|
||||
DEFINE TABLE IF NOT EXISTS scratchpad SCHEMALESS;
|
||||
|
||||
-- Standard fields from stored_object! macro
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON scratchpad TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON scratchpad TYPE datetime;
|
||||
|
||||
-- Custom fields from the Scratchpad struct
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON scratchpad TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS title ON scratchpad TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content ON scratchpad TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS last_saved_at ON scratchpad TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS is_dirty ON scratchpad TYPE bool DEFAULT false;
|
||||
DEFINE FIELD IF NOT EXISTS is_archived ON scratchpad TYPE bool DEFAULT false;
|
||||
DEFINE FIELD IF NOT EXISTS archived_at ON scratchpad TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS ingested_at ON scratchpad TYPE option<datetime>;
|
||||
|
||||
-- Indexes based on query patterns
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_user_idx ON scratchpad FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_user_archived_idx ON scratchpad FIELDS user_id, is_archived;
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_updated_idx ON scratchpad FIELDS updated_at;
|
||||
DEFINE INDEX IF NOT EXISTS scratchpad_archived_idx ON scratchpad FIELDS archived_at;
|
||||
@@ -0,0 +1,18 @@
|
||||
-- Remove HNSW indexes from base tables (now created at runtime on *_embedding tables)
|
||||
REMOVE INDEX IF EXISTS idx_embedding_entities ON knowledge_entity;
|
||||
REMOVE INDEX IF EXISTS idx_embedding_chunks ON text_chunk;
|
||||
|
||||
-- Remove FTS indexes (now created at runtime via indexes.rs)
|
||||
REMOVE INDEX IF EXISTS text_content_fts_text_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_category_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_context_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_file_name_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_url_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_url_title_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS knowledge_entity_fts_name_idx ON knowledge_entity;
|
||||
REMOVE INDEX IF EXISTS knowledge_entity_fts_description_idx ON knowledge_entity;
|
||||
REMOVE INDEX IF EXISTS text_chunk_fts_chunk_idx ON text_chunk;
|
||||
|
||||
-- Remove legacy analyzers (recreated at runtime with updated configuration)
|
||||
REMOVE ANALYZER IF EXISTS app_default_fts_analyzer;
|
||||
REMOVE ANALYZER IF EXISTS app_en_fts_analyzer;
|
||||
@@ -0,0 +1,23 @@
|
||||
-- Move chunk/entity embeddings to dedicated tables for index efficiency.
|
||||
|
||||
-- Text chunk embeddings table
|
||||
DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON text_chunk_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON text_chunk_embedding TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS source_id ON text_chunk_embedding TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record<text_chunk>;
|
||||
DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array<float>;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;
|
||||
|
||||
-- Knowledge entity embeddings table
|
||||
DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;
|
||||
DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array<float>;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;
|
||||
@@ -0,0 +1,23 @@
|
||||
-- Copy embeddings from base tables to dedicated tables
|
||||
-- This runs BEFORE the field removal migration
|
||||
|
||||
FOR $chunk IN (SELECT * FROM text_chunk WHERE embedding != NONE AND array::len(embedding) > 0) {
|
||||
CREATE text_chunk_embedding CONTENT {
|
||||
chunk_id: $chunk.id,
|
||||
embedding: $chunk.embedding,
|
||||
user_id: $chunk.user_id,
|
||||
source_id: $chunk.source_id,
|
||||
created_at: $chunk.created_at,
|
||||
updated_at: $chunk.updated_at
|
||||
};
|
||||
};
|
||||
|
||||
FOR $entity IN (SELECT * FROM knowledge_entity WHERE embedding != NONE AND array::len(embedding) > 0) {
|
||||
CREATE knowledge_entity_embedding CONTENT {
|
||||
entity_id: $entity.id,
|
||||
embedding: $entity.embedding,
|
||||
user_id: $entity.user_id,
|
||||
created_at: $entity.created_at,
|
||||
updated_at: $entity.updated_at
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1,3 @@
|
||||
-- Drop legacy embedding fields from base tables; embeddings now live in *_embedding tables.
|
||||
REMOVE FIELD IF EXISTS embedding ON TABLE text_chunk;
|
||||
REMOVE FIELD IF EXISTS embedding ON TABLE knowledge_entity;
|
||||
@@ -0,0 +1,8 @@
|
||||
-- Add embedding_backend field to system_settings for visibility of active backend
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS embedding_backend ON system_settings TYPE option<string>;
|
||||
|
||||
-- Set default to 'openai' for existing installs to preserve backward compatibility
|
||||
UPDATE system_settings:current SET
|
||||
embedding_backend = 'openai'
|
||||
WHERE embedding_backend == NONE;
|
||||
@@ -0,0 +1,97 @@
|
||||
-- Enforce SCHEMAFULL on all tables and define missing fields
|
||||
|
||||
-- 1. Define missing fields for ingestion_task (formerly job, but now ingestion_task)
|
||||
DEFINE TABLE OVERWRITE ingestion_task SCHEMAFULL;
|
||||
|
||||
-- Core Fields
|
||||
DEFINE FIELD IF NOT EXISTS id ON ingestion_task TYPE record<ingestion_task>;
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime DEFAULT time::now();
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime DEFAULT time::now();
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- State Machine Fields
|
||||
DEFINE FIELD IF NOT EXISTS state ON ingestion_task TYPE string ASSERT $value IN ['Pending', 'Reserved', 'Processing', 'Succeeded', 'Failed', 'Cancelled', 'DeadLetter'];
|
||||
DEFINE FIELD IF NOT EXISTS attempts ON ingestion_task TYPE int DEFAULT 0;
|
||||
DEFINE FIELD IF NOT EXISTS max_attempts ON ingestion_task TYPE int DEFAULT 3;
|
||||
DEFINE FIELD IF NOT EXISTS scheduled_at ON ingestion_task TYPE datetime DEFAULT time::now();
|
||||
DEFINE FIELD IF NOT EXISTS locked_at ON ingestion_task TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS lease_duration_secs ON ingestion_task TYPE int DEFAULT 300;
|
||||
DEFINE FIELD IF NOT EXISTS worker_id ON ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS error_code ON ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS error_message ON ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS last_error_at ON ingestion_task TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS priority ON ingestion_task TYPE int DEFAULT 0;
|
||||
|
||||
-- Content Payload (IngestionPayload Enum)
|
||||
DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url ON ingestion_task TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text ON ingestion_task TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS content.File ON ingestion_task TYPE option<object>;
|
||||
|
||||
-- Content: Url Variant
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.url ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.context ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.category ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- Content: Text Variant
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.text ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.context ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.category ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- Content: File Variant
|
||||
DEFINE FIELD IF NOT EXISTS content.File.context ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.category ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.user_id ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info ON ingestion_task TYPE object;
|
||||
|
||||
-- Content: File.file_info (FileInfo Struct)
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.id ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.created_at ON ingestion_task TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.updated_at ON ingestion_task TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.sha256 ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.path ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.file_name ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.mime_type ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- 2. Enforce SCHEMAFULL on all other tables
|
||||
DEFINE TABLE OVERWRITE analytics SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE conversation SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE file SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE knowledge_entity SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE message SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE relates_to SCHEMAFULL TYPE RELATION;
|
||||
DEFINE FIELD IF NOT EXISTS in ON relates_to TYPE record<knowledge_entity>;
|
||||
DEFINE FIELD IF NOT EXISTS out ON relates_to TYPE record<knowledge_entity>;
|
||||
DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;
|
||||
DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;
|
||||
DEFINE TABLE OVERWRITE scratchpad SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE system_settings SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE text_chunk SCHEMAFULL;
|
||||
-- text_content must have fields defined before enforcing SCHEMAFULL
|
||||
DEFINE TABLE OVERWRITE text_content SCHEMAFULL;
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;
|
||||
|
||||
DEFINE TABLE OVERWRITE user SCHEMAFULL;
|
||||
@@ -0,0 +1 @@
|
||||
DEFINE FIELD IF NOT EXISTS theme ON user TYPE string DEFAULT "system";
|
||||
@@ -0,0 +1,3 @@
|
||||
-- Per-user deduplication: same SHA256 may exist for different users.
|
||||
REMOVE INDEX IF EXISTS file_sha256_idx ON file;
|
||||
DEFINE INDEX IF NOT EXISTS file_user_sha256_idx ON file FIELDS user_id, sha256 UNIQUE;
|
||||
@@ -0,0 +1,33 @@
|
||||
-- Harden knowledge entity embeddings and graph storage invariants.
|
||||
|
||||
DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity_embedding TYPE string;
|
||||
|
||||
-- Backfill denormalized source_id from the linked entity.
|
||||
FOR $emb IN (SELECT * FROM knowledge_entity_embedding WHERE source_id = NONE OR source_id = '') {
|
||||
LET $entity = (SELECT source_id FROM $emb.entity_id)[0];
|
||||
IF $entity != NONE {
|
||||
UPDATE $emb.id SET source_id = $entity.source_id;
|
||||
}
|
||||
};
|
||||
|
||||
-- Re-key embeddings so record id matches entity id (stable 1:1 identity).
|
||||
FOR $emb IN (SELECT * FROM knowledge_entity_embedding) {
|
||||
LET $entity_key = record::id($emb.entity_id);
|
||||
LET $canonical = type::thing('knowledge_entity_embedding', $entity_key);
|
||||
IF $emb.id != $canonical {
|
||||
UPSERT $canonical CONTENT {
|
||||
entity_id: $emb.entity_id,
|
||||
embedding: $emb.embedding,
|
||||
user_id: $emb.user_id,
|
||||
source_id: $emb.source_id,
|
||||
created_at: $emb.created_at,
|
||||
updated_at: $emb.updated_at
|
||||
};
|
||||
DELETE $emb.id;
|
||||
}
|
||||
};
|
||||
|
||||
REMOVE INDEX IF EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id UNIQUE;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_source_id_idx ON knowledge_entity_embedding FIELDS source_id;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_user_source_idx ON knowledge_entity FIELDS user_id, source_id;
|
||||
@@ -0,0 +1,21 @@
|
||||
-- Harden text chunk embeddings storage invariants.
|
||||
|
||||
-- Re-key embeddings so record id matches chunk id (stable 1:1 identity).
|
||||
FOR $emb IN (SELECT * FROM text_chunk_embedding) {
|
||||
LET $chunk_key = record::id($emb.chunk_id);
|
||||
LET $canonical = type::thing('text_chunk_embedding', $chunk_key);
|
||||
IF $emb.id != $canonical {
|
||||
UPSERT $canonical CONTENT {
|
||||
chunk_id: $emb.chunk_id,
|
||||
embedding: $emb.embedding,
|
||||
user_id: $emb.user_id,
|
||||
source_id: $emb.source_id,
|
||||
created_at: $emb.created_at,
|
||||
updated_at: $emb.updated_at
|
||||
};
|
||||
DELETE $emb.id;
|
||||
}
|
||||
};
|
||||
|
||||
REMOVE INDEX IF EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;
|
||||
+1
File diff suppressed because one or more lines are too long
@@ -0,0 +1 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -242,7 +242,7 @@\n\n # Defines the schema for the 'text_content' table.\n\n-DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n+DEFINE TABLE IF NOT EXISTS text_content SCHEMAFULL;\n\n # Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;\n@@ -254,10 +254,24 @@\n DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;\n # UrlInfo is a struct, store as object\n DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;\n+DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;\n+\n DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;\n DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;\n DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;\n\n+# FileInfo fields\n+DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;\n+\n # Indexes based on query patterns\n DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;\n","events":null}
|
||||
@@ -0,0 +1 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -28,6 +28,7 @@\n # Add indexes based on query patterns (get_complete_conversation ownership check, get_user_conversations)\n DEFINE INDEX IF NOT EXISTS conversation_user_id_idx ON conversation FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS conversation_created_at_idx ON conversation FIELDS created_at; # For get_user_conversations ORDER BY\n+DEFINE INDEX IF NOT EXISTS conversation_user_updated_at_idx ON conversation FIELDS user_id, updated_at; # For sidebar conversation projection ORDER BY\n\n # Defines the schema for the 'file' table (used by FileInfo).\n\n","events":null}
|
||||
@@ -0,0 +1 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -45,9 +45,8 @@\n DEFINE FIELD IF NOT EXISTS mime_type ON file TYPE string;\n DEFINE FIELD IF NOT EXISTS user_id ON file TYPE string;\n\n-# Indexes based on usage (get_by_sha, potentially user lookups)\n-# Using UNIQUE based on the logic in FileInfo::new to prevent duplicates\n-DEFINE INDEX IF NOT EXISTS file_sha256_idx ON file FIELDS sha256 UNIQUE;\n+# Indexes based on usage (get_by_sha scoped by user_id, user lookups)\n+DEFINE INDEX IF NOT EXISTS file_user_sha256_idx ON file FIELDS user_id, sha256 UNIQUE;\n DEFINE INDEX IF NOT EXISTS file_user_id_idx ON file FIELDS user_id;\n\n # Defines the schema for the 'ingestion_task' table (used by IngestionTask).\n","events":null}
|
||||
+1
@@ -0,0 +1 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -68,7 +68,7 @@\n\n # Defines the schema for the 'knowledge_entity' table.\n\n-DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;\n+DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMAFULL;\n\n # Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE datetime;\n@@ -90,6 +90,7 @@\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_user_source_idx ON knowledge_entity FIELDS user_id, source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n@@ -102,6 +103,7 @@\n DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;\n+DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity_embedding TYPE string;\n\n -- Custom fields\n DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;\n@@ -109,8 +111,9 @@\n\n -- Indexes\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n-DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id UNIQUE;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_source_id_idx ON knowledge_entity_embedding FIELDS source_id;\n\n # Defines the schema for the 'message' table.\n\n@@ -135,19 +138,17 @@\n # Defines the 'relates_to' edge table for KnowledgeRelationships.\n # Edges connect nodes, in this case knowledge_entity records.\n\n-# Define the edge table itself, enforcing connections between knowledge_entity records\n-# SCHEMAFULL requires all fields to be defined, maybe start with SCHEMALESS if metadata might vary\n-DEFINE TABLE IF NOT EXISTS relates_to SCHEMALESS TYPE RELATION FROM knowledge_entity TO knowledge_entity;\n+DEFINE TABLE IF NOT EXISTS relates_to SCHEMAFULL TYPE RELATION FROM knowledge_entity TO knowledge_entity;\n+\n+DEFINE FIELD IF NOT EXISTS in ON relates_to TYPE record<knowledge_entity>;\n+DEFINE FIELD IF NOT EXISTS out ON relates_to TYPE record<knowledge_entity>;\n\n-# Define the metadata field within the edge\n # RelationshipMetadata is a struct, store as object\n DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;\n+DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;\n+DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;\n+DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;\n\n-# Optionally, define fields within the metadata object for stricter schema (requires SCHEMAFULL on table)\n-# DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;\n-# DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;\n-# DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;\n-\n # Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;\n","events":null}
|
||||
+1
@@ -0,0 +1 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -237,7 +237,7 @@\n\n -- Indexes\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n-DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n+DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n\n","events":null}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user