fix: migrations

schemafull
This commit is contained in:
Per Stark
2025-12-22 08:04:50 +01:00
parent 04faa38ee6
commit 30b8a65377
7 changed files with 116 additions and 2 deletions

View File

@@ -0,0 +1,18 @@
-- Remove HNSW indexes from base tables (now created at runtime on *_embedding tables)
REMOVE INDEX IF EXISTS idx_embedding_entities ON knowledge_entity;
REMOVE INDEX IF EXISTS idx_embedding_chunks ON text_chunk;
-- Remove FTS indexes (now created at runtime via indexes.rs)
REMOVE INDEX IF EXISTS text_content_fts_text_idx ON text_content;
REMOVE INDEX IF EXISTS text_content_fts_category_idx ON text_content;
REMOVE INDEX IF EXISTS text_content_fts_context_idx ON text_content;
REMOVE INDEX IF EXISTS text_content_fts_file_name_idx ON text_content;
REMOVE INDEX IF EXISTS text_content_fts_url_idx ON text_content;
REMOVE INDEX IF EXISTS text_content_fts_url_title_idx ON text_content;
REMOVE INDEX IF EXISTS knowledge_entity_fts_name_idx ON knowledge_entity;
REMOVE INDEX IF EXISTS knowledge_entity_fts_description_idx ON knowledge_entity;
REMOVE INDEX IF EXISTS text_chunk_fts_chunk_idx ON text_chunk;
-- Remove legacy analyzers (recreated at runtime with updated configuration)
REMOVE ANALYZER IF EXISTS app_default_fts_analyzer;
REMOVE ANALYZER IF EXISTS app_en_fts_analyzer;

View File

@@ -0,0 +1,23 @@
-- Copy embeddings from base tables to dedicated tables
-- This runs BEFORE the field removal migration
FOR $chunk IN (SELECT * FROM text_chunk WHERE embedding != NONE AND array::len(embedding) > 0) {
CREATE text_chunk_embedding CONTENT {
chunk_id: $chunk.id,
embedding: $chunk.embedding,
user_id: $chunk.user_id,
source_id: $chunk.source_id,
created_at: $chunk.created_at,
updated_at: $chunk.updated_at
};
};
FOR $entity IN (SELECT * FROM knowledge_entity WHERE embedding != NONE AND array::len(embedding) > 0) {
CREATE knowledge_entity_embedding CONTENT {
entity_id: $entity.id,
embedding: $entity.embedding,
user_id: $entity.user_id,
created_at: $entity.created_at,
updated_at: $entity.updated_at
};
};

View File

@@ -0,0 +1,70 @@
-- Enforce SCHEMAFULL on all tables and define missing fields
-- 1. Define missing fields for ingestion_task (formerly job, but now ingestion_task)
DEFINE TABLE OVERWRITE ingestion_task SCHEMAFULL;
-- Core Fields
DEFINE FIELD IF NOT EXISTS id ON ingestion_task TYPE record<ingestion_task>;
DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime DEFAULT time::now();
DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime DEFAULT time::now();
DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;
-- State Machine Fields
DEFINE FIELD IF NOT EXISTS state ON ingestion_task TYPE string ASSERT $value IN ['Pending', 'Reserved', 'Processing', 'Succeeded', 'Failed', 'Cancelled', 'DeadLetter'];
DEFINE FIELD IF NOT EXISTS attempts ON ingestion_task TYPE int DEFAULT 0;
DEFINE FIELD IF NOT EXISTS max_attempts ON ingestion_task TYPE int DEFAULT 3;
DEFINE FIELD IF NOT EXISTS scheduled_at ON ingestion_task TYPE datetime DEFAULT time::now();
DEFINE FIELD IF NOT EXISTS locked_at ON ingestion_task TYPE option<datetime>;
DEFINE FIELD IF NOT EXISTS lease_duration_secs ON ingestion_task TYPE int DEFAULT 300;
DEFINE FIELD IF NOT EXISTS worker_id ON ingestion_task TYPE option<string>;
DEFINE FIELD IF NOT EXISTS error_code ON ingestion_task TYPE option<string>;
DEFINE FIELD IF NOT EXISTS error_message ON ingestion_task TYPE option<string>;
DEFINE FIELD IF NOT EXISTS last_error_at ON ingestion_task TYPE option<datetime>;
DEFINE FIELD IF NOT EXISTS priority ON ingestion_task TYPE int DEFAULT 0;
-- Content Payload (IngestionPayload Enum)
DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;
DEFINE FIELD IF NOT EXISTS content.Url ON ingestion_task TYPE option<object>;
DEFINE FIELD IF NOT EXISTS content.Text ON ingestion_task TYPE option<object>;
DEFINE FIELD IF NOT EXISTS content.File ON ingestion_task TYPE option<object>;
-- Content: Url Variant
DEFINE FIELD IF NOT EXISTS content.Url.url ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.Url.context ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.Url.category ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.Url.user_id ON ingestion_task TYPE string;
-- Content: Text Variant
DEFINE FIELD IF NOT EXISTS content.Text.text ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.Text.context ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.Text.category ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.Text.user_id ON ingestion_task TYPE string;
-- Content: File Variant
DEFINE FIELD IF NOT EXISTS content.File.context ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.category ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.user_id ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.file_info ON ingestion_task TYPE object;
-- Content: File.file_info (FileInfo Struct)
DEFINE FIELD IF NOT EXISTS content.File.file_info.id ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.file_info.created_at ON ingestion_task TYPE datetime;
DEFINE FIELD IF NOT EXISTS content.File.file_info.updated_at ON ingestion_task TYPE datetime;
DEFINE FIELD IF NOT EXISTS content.File.file_info.sha256 ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.file_info.path ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.file_info.file_name ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.file_info.mime_type ON ingestion_task TYPE string;
DEFINE FIELD IF NOT EXISTS content.File.file_info.user_id ON ingestion_task TYPE string;
-- 2. Enforce SCHEMAFULL on all other tables
DEFINE TABLE OVERWRITE analytics SCHEMAFULL;
DEFINE TABLE OVERWRITE conversation SCHEMAFULL;
DEFINE TABLE OVERWRITE file SCHEMAFULL;
DEFINE TABLE OVERWRITE knowledge_entity SCHEMAFULL;
DEFINE TABLE OVERWRITE message SCHEMAFULL;
DEFINE TABLE OVERWRITE relates_to SCHEMAFULL;
DEFINE TABLE OVERWRITE scratchpad SCHEMAFULL;
DEFINE TABLE OVERWRITE system_settings SCHEMAFULL;
DEFINE TABLE OVERWRITE text_chunk SCHEMAFULL;
DEFINE TABLE OVERWRITE text_content SCHEMAFULL;
DEFINE TABLE OVERWRITE user SCHEMAFULL;

View File

@@ -1 +0,0 @@
{"schemas":"--- original\n+++ modified\n@@ -85,31 +85,30 @@\n\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;\n\n-# Indexes based on build_indexes and query patterns\n-# The INDEX definition correctly specifies the vector properties\n-# HNSW index now defined on knowledge_entity_embedding table for better memory usage \n-# DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes based on build_indexes and query patterns\n+-- HNSW index now defined on knowledge_entity_embedding table for better memory usage\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n-# Defines the schema for the 'knowledge_entity_embedding' table.\n-# Separate table to optimize HNSW index creation memory usage\n+-- Defines the schema for the 'knowledge_entity_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n\n DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;\n\n-# Standard fields\n+-- Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;\n\n-# Custom fields\n+-- Custom fields\n DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;\n DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array<float>;\n\n-# Indexes\n-# DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;\n\n@@ -220,8 +219,8 @@\n DEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;\n\n-# Defines the schema for the 'text_chunk_embedding' table.\n-# Separate table to optimize HNSW index creation memory usage\n+-- Defines the schema for the 'text_chunk_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n\n DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;\n\n@@ -235,8 +234,8 @@\n DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record<text_chunk>;\n DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array<float>;\n\n-# Indexes\n-# DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n","events":null}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -51,8 +51,11 @@ pub struct TextContentSearchResult {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct UrlInfo {
#[serde(default)]
pub url: String,
#[serde(default)]
pub title: String,
#[serde(default)]
pub image_id: String,
}