mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-30 03:10:45 +02:00
chore: harden text chunk embeddings and text content storage
Align text chunk embedding identity with knowledge entities (chunk id as record id, UNIQUE chunk_id index, dimension validation), make cascade deletes transactional, and improve text content patch/search reliability with tests.
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
-- Harden text chunk embeddings storage invariants.
|
||||
|
||||
-- Re-key embeddings so record id matches chunk id (stable 1:1 identity).
|
||||
FOR $emb IN (SELECT * FROM text_chunk_embedding) {
|
||||
LET $chunk_key = record::id($emb.chunk_id);
|
||||
LET $canonical = type::thing('text_chunk_embedding', $chunk_key);
|
||||
IF $emb.id != $canonical {
|
||||
UPSERT $canonical CONTENT {
|
||||
chunk_id: $emb.chunk_id,
|
||||
embedding: $emb.embedding,
|
||||
user_id: $emb.user_id,
|
||||
source_id: $emb.source_id,
|
||||
created_at: $emb.created_at,
|
||||
updated_at: $emb.updated_at
|
||||
};
|
||||
DELETE $emb.id;
|
||||
}
|
||||
};
|
||||
|
||||
REMOVE INDEX IF EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;
|
||||
+1
@@ -0,0 +1 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -237,7 +237,7 @@\n\n -- Indexes\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n-DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n+DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n\n","events":null}
|
||||
Reference in New Issue
Block a user