mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-24 18:31:45 +01:00
fix: migrations
schemafull
This commit is contained in:
@@ -0,0 +1,18 @@
|
||||
-- Remove HNSW indexes from base tables (now created at runtime on *_embedding tables)
|
||||
REMOVE INDEX IF EXISTS idx_embedding_entities ON knowledge_entity;
|
||||
REMOVE INDEX IF EXISTS idx_embedding_chunks ON text_chunk;
|
||||
|
||||
-- Remove FTS indexes (now created at runtime via indexes.rs)
|
||||
REMOVE INDEX IF EXISTS text_content_fts_text_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_category_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_context_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_file_name_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_url_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS text_content_fts_url_title_idx ON text_content;
|
||||
REMOVE INDEX IF EXISTS knowledge_entity_fts_name_idx ON knowledge_entity;
|
||||
REMOVE INDEX IF EXISTS knowledge_entity_fts_description_idx ON knowledge_entity;
|
||||
REMOVE INDEX IF EXISTS text_chunk_fts_chunk_idx ON text_chunk;
|
||||
|
||||
-- Remove legacy analyzers (recreated at runtime with updated configuration)
|
||||
REMOVE ANALYZER IF EXISTS app_default_fts_analyzer;
|
||||
REMOVE ANALYZER IF EXISTS app_en_fts_analyzer;
|
||||
@@ -0,0 +1,23 @@
|
||||
-- Copy embeddings from base tables to dedicated tables
|
||||
-- This runs BEFORE the field removal migration
|
||||
|
||||
FOR $chunk IN (SELECT * FROM text_chunk WHERE embedding != NONE AND array::len(embedding) > 0) {
|
||||
CREATE text_chunk_embedding CONTENT {
|
||||
chunk_id: $chunk.id,
|
||||
embedding: $chunk.embedding,
|
||||
user_id: $chunk.user_id,
|
||||
source_id: $chunk.source_id,
|
||||
created_at: $chunk.created_at,
|
||||
updated_at: $chunk.updated_at
|
||||
};
|
||||
};
|
||||
|
||||
FOR $entity IN (SELECT * FROM knowledge_entity WHERE embedding != NONE AND array::len(embedding) > 0) {
|
||||
CREATE knowledge_entity_embedding CONTENT {
|
||||
entity_id: $entity.id,
|
||||
embedding: $entity.embedding,
|
||||
user_id: $entity.user_id,
|
||||
created_at: $entity.created_at,
|
||||
updated_at: $entity.updated_at
|
||||
};
|
||||
};
|
||||
70
common/migrations/20251231_enforce_schemafull.surql
Normal file
70
common/migrations/20251231_enforce_schemafull.surql
Normal file
@@ -0,0 +1,70 @@
|
||||
-- Enforce SCHEMAFULL on all tables and define missing fields
|
||||
|
||||
-- 1. Define missing fields for ingestion_task (formerly job, but now ingestion_task)
|
||||
DEFINE TABLE OVERWRITE ingestion_task SCHEMAFULL;
|
||||
|
||||
-- Core Fields
|
||||
DEFINE FIELD IF NOT EXISTS id ON ingestion_task TYPE record<ingestion_task>;
|
||||
DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime DEFAULT time::now();
|
||||
DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime DEFAULT time::now();
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- State Machine Fields
|
||||
DEFINE FIELD IF NOT EXISTS state ON ingestion_task TYPE string ASSERT $value IN ['Pending', 'Reserved', 'Processing', 'Succeeded', 'Failed', 'Cancelled', 'DeadLetter'];
|
||||
DEFINE FIELD IF NOT EXISTS attempts ON ingestion_task TYPE int DEFAULT 0;
|
||||
DEFINE FIELD IF NOT EXISTS max_attempts ON ingestion_task TYPE int DEFAULT 3;
|
||||
DEFINE FIELD IF NOT EXISTS scheduled_at ON ingestion_task TYPE datetime DEFAULT time::now();
|
||||
DEFINE FIELD IF NOT EXISTS locked_at ON ingestion_task TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS lease_duration_secs ON ingestion_task TYPE int DEFAULT 300;
|
||||
DEFINE FIELD IF NOT EXISTS worker_id ON ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS error_code ON ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS error_message ON ingestion_task TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS last_error_at ON ingestion_task TYPE option<datetime>;
|
||||
DEFINE FIELD IF NOT EXISTS priority ON ingestion_task TYPE int DEFAULT 0;
|
||||
|
||||
-- Content Payload (IngestionPayload Enum)
|
||||
DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url ON ingestion_task TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text ON ingestion_task TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS content.File ON ingestion_task TYPE option<object>;
|
||||
|
||||
-- Content: Url Variant
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.url ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.context ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.category ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Url.user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- Content: Text Variant
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.text ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.context ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.category ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.Text.user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- Content: File Variant
|
||||
DEFINE FIELD IF NOT EXISTS content.File.context ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.category ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.user_id ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info ON ingestion_task TYPE object;
|
||||
|
||||
-- Content: File.file_info (FileInfo Struct)
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.id ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.created_at ON ingestion_task TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.updated_at ON ingestion_task TYPE datetime;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.sha256 ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.path ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.file_name ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.mime_type ON ingestion_task TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS content.File.file_info.user_id ON ingestion_task TYPE string;
|
||||
|
||||
-- 2. Enforce SCHEMAFULL on all other tables
|
||||
DEFINE TABLE OVERWRITE analytics SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE conversation SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE file SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE knowledge_entity SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE message SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE relates_to SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE scratchpad SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE system_settings SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE text_chunk SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE text_content SCHEMAFULL;
|
||||
DEFINE TABLE OVERWRITE user SCHEMAFULL;
|
||||
@@ -1 +0,0 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -85,31 +85,30 @@\n\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;\n\n-# Indexes based on build_indexes and query patterns\n-# The INDEX definition correctly specifies the vector properties\n-# HNSW index now defined on knowledge_entity_embedding table for better memory usage \n-# DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes based on build_indexes and query patterns\n+-- HNSW index now defined on knowledge_entity_embedding table for better memory usage\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n-# Defines the schema for the 'knowledge_entity_embedding' table.\n-# Separate table to optimize HNSW index creation memory usage\n+-- Defines the schema for the 'knowledge_entity_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n\n DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;\n\n-# Standard fields\n+-- Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;\n\n-# Custom fields\n+-- Custom fields\n DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;\n DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array<float>;\n\n-# Indexes\n-# DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;\n\n@@ -220,8 +219,8 @@\n DEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;\n\n-# Defines the schema for the 'text_chunk_embedding' table.\n-# Separate table to optimize HNSW index creation memory usage\n+-- Defines the schema for the 'text_chunk_embedding' table.\n+-- Separate table to optimize HNSW index creation memory usage\n\n DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;\n\n@@ -235,8 +234,8 @@\n DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record<text_chunk>;\n DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array<float>;\n\n-# Indexes\n-# DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n+-- Indexes\n+-- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n","events":null}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -51,8 +51,11 @@ pub struct TextContentSearchResult {
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
|
||||
pub struct UrlInfo {
|
||||
#[serde(default)]
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub title: String,
|
||||
#[serde(default)]
|
||||
pub image_id: String,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user