From 0acdba4f54d10976cfaffae2f7094c4e3983520a Mon Sep 17 00:00:00 2001 From: Per Stark Date: Wed, 27 May 2026 14:13:19 +0200 Subject: [PATCH] fix: replace manual embedding serialization with serde_json - replaced write!() loops with serde_json::to_string in 4 re-embedding methods - standardized SQL building to use write!() with proper error propagation - eliminates manual f32 vector string building (memory waste + loop risk) --- common/src/storage/types/knowledge_entity.rs | 54 +++++++++----------- common/src/storage/types/text_chunk.rs | 31 +++++------ 2 files changed, 36 insertions(+), 49 deletions(-) diff --git a/common/src/storage/types/knowledge_entity.rs b/common/src/storage/types/knowledge_entity.rs index e7b0076..7c4e860 100644 --- a/common/src/storage/types/knowledge_entity.rs +++ b/common/src/storage/types/knowledge_entity.rs @@ -403,15 +403,10 @@ impl KnowledgeEntity { // Add all update statements to the embedding table for (id, (embedding, user_id)) in new_embeddings { - let mut embedding_str = String::from("["); - for (i, f) in embedding.iter().enumerate() { - if i > 0 { - embedding_str.push(','); - } - write!(embedding_str, "{f}").unwrap_or_default(); - } - embedding_str.push(']'); - transaction_query.push_str(&format!( + let embedding_str = serde_json::to_string(&embedding) + .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?; + write!( + transaction_query, "UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \ entity_id = type::thing('knowledge_entity', '{id}'), \ embedding = {embedding}, \ @@ -420,14 +415,16 @@ impl KnowledgeEntity { updated_at = time::now();", id = id, embedding = embedding_str, - user_id = user_id - )); + user_id = user_id, + ) + .map_err(|e| AppError::InternalError(e.to_string()))?; } - transaction_query.push_str(&format!( - "DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {};", - new_dimensions - )); + write!( + transaction_query, + "DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {new_dimensions};", + ) + .map_err(|e| AppError::InternalError(e.to_string()))?; transaction_query.push_str("COMMIT TRANSACTION;"); @@ -529,15 +526,10 @@ impl KnowledgeEntity { let mut transaction_query = String::from("BEGIN TRANSACTION;"); for (id, (embedding, user_id)) in new_embeddings { - let mut embedding_str = String::from("["); - for (i, f) in embedding.iter().enumerate() { - if i > 0 { - embedding_str.push(','); - } - write!(embedding_str, "{f}").unwrap_or_default(); - } - embedding_str.push(']'); - transaction_query.push_str(&format!( + let embedding_str = serde_json::to_string(&embedding) + .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?; + write!( + transaction_query, "CREATE type::thing('knowledge_entity_embedding', '{id}') SET \ entity_id = type::thing('knowledge_entity', '{id}'), \ embedding = {embedding}, \ @@ -546,14 +538,16 @@ impl KnowledgeEntity { updated_at = time::now();", id = id, embedding = embedding_str, - user_id = user_id - )); + user_id = user_id, + ) + .map_err(|e| AppError::InternalError(e.to_string()))?; } - transaction_query.push_str(&format!( - "DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {};", - new_dimensions - )); + write!( + transaction_query, + "DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {new_dimensions};", + ) + .map_err(|e| AppError::InternalError(e.to_string()))?; transaction_query.push_str("COMMIT TRANSACTION;"); diff --git a/common/src/storage/types/text_chunk.rs b/common/src/storage/types/text_chunk.rs index fd04a24..2868dc5 100644 --- a/common/src/storage/types/text_chunk.rs +++ b/common/src/storage/types/text_chunk.rs @@ -290,16 +290,8 @@ impl TextChunk { let mut transaction_query = String::from("BEGIN TRANSACTION;"); for (id, (embedding, user_id, source_id)) in new_embeddings { - let mut embedding_str = String::from("["); - for (i, f) in embedding.iter().enumerate() { - if i > 0 { - embedding_str.push(','); - } - write!(embedding_str, "{f}").unwrap_or_default(); - } - embedding_str.push(']'); - // Use the chunk id as the embedding record id to keep a 1:1 mapping - let embedding = embedding_str; + let embedding_str = serde_json::to_string(&embedding) + .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?; write!( &mut transaction_query, "UPSERT type::thing('text_chunk_embedding', '{id}') SET \ @@ -309,6 +301,10 @@ impl TextChunk { user_id = '{user_id}', \ created_at = IF created_at != NONE THEN created_at ELSE time::now() END, \ updated_at = time::now();", + id = id, + embedding = embedding_str, + source_id = source_id, + user_id = user_id, ) .map_err(|e| AppError::InternalError(e.to_string()))?; } @@ -409,15 +405,8 @@ impl TextChunk { let mut transaction_query = String::from("BEGIN TRANSACTION;"); for (id, (embedding, user_id, source_id)) in new_embeddings { - let mut embedding_str = String::from("["); - for (i, f) in embedding.iter().enumerate() { - if i > 0 { - embedding_str.push(','); - } - write!(embedding_str, "{f}").unwrap_or_default(); - } - embedding_str.push(']'); - let embedding = embedding_str; + let embedding_str = serde_json::to_string(&embedding) + .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?; write!( &mut transaction_query, "CREATE type::thing('text_chunk_embedding', '{id}') SET \ @@ -427,6 +416,10 @@ impl TextChunk { user_id = '{user_id}', \ created_at = time::now(), \ updated_at = time::now();", + id = id, + embedding = embedding_str, + source_id = source_id, + user_id = user_id, ) .map_err(|e| AppError::InternalError(e.to_string()))?; }