chore: harden text chunk embeddings and text content storage

Align text chunk embedding identity with knowledge entities (chunk id as record id, UNIQUE chunk_id index, dimension validation), make cascade deletes transactional, and improve text content patch/search reliability with tests.
This commit is contained in:
Per Stark
2026-05-28 22:03:01 +02:00
parent 5724f11dc1
commit ba8c36da1e
6 changed files with 445 additions and 133 deletions
@@ -0,0 +1,21 @@
-- Harden text chunk embeddings storage invariants.
-- Re-key embeddings so record id matches chunk id (stable 1:1 identity).
FOR $emb IN (SELECT * FROM text_chunk_embedding) {
LET $chunk_key = record::id($emb.chunk_id);
LET $canonical = type::thing('text_chunk_embedding', $chunk_key);
IF $emb.id != $canonical {
UPSERT $canonical CONTENT {
chunk_id: $emb.chunk_id,
embedding: $emb.embedding,
user_id: $emb.user_id,
source_id: $emb.source_id,
created_at: $emb.created_at,
updated_at: $emb.updated_at
};
DELETE $emb.id;
}
};
REMOVE INDEX IF EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding;
DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;
@@ -0,0 +1 @@
{"schemas":"--- original\n+++ modified\n@@ -237,7 +237,7 @@\n\n -- Indexes\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n-DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n+DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n\n","events":null}