fix: replace manual embedding serialization with serde_json

- replaced write!() loops with serde_json::to_string in 4 re-embedding methods
- standardized SQL building to use write!() with proper error propagation
- eliminates manual f32 vector string building (memory waste + loop risk)
This commit is contained in:
Per Stark
2026-05-27 14:13:19 +02:00
parent 9609880cff
commit 0acdba4f54
2 changed files with 36 additions and 49 deletions
+24 -30
View File
@@ -403,15 +403,10 @@ impl KnowledgeEntity {
// Add all update statements to the embedding table // Add all update statements to the embedding table
for (id, (embedding, user_id)) in new_embeddings { for (id, (embedding, user_id)) in new_embeddings {
let mut embedding_str = String::from("["); let embedding_str = serde_json::to_string(&embedding)
for (i, f) in embedding.iter().enumerate() { .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
if i > 0 { write!(
embedding_str.push(','); transaction_query,
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
transaction_query.push_str(&format!(
"UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \ "UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \
entity_id = type::thing('knowledge_entity', '{id}'), \ entity_id = type::thing('knowledge_entity', '{id}'), \
embedding = {embedding}, \ embedding = {embedding}, \
@@ -420,14 +415,16 @@ impl KnowledgeEntity {
updated_at = time::now();", updated_at = time::now();",
id = id, id = id,
embedding = embedding_str, embedding = embedding_str,
user_id = user_id user_id = user_id,
)); )
.map_err(|e| AppError::InternalError(e.to_string()))?;
} }
transaction_query.push_str(&format!( write!(
"DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {};", transaction_query,
new_dimensions "DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {new_dimensions};",
)); )
.map_err(|e| AppError::InternalError(e.to_string()))?;
transaction_query.push_str("COMMIT TRANSACTION;"); transaction_query.push_str("COMMIT TRANSACTION;");
@@ -529,15 +526,10 @@ impl KnowledgeEntity {
let mut transaction_query = String::from("BEGIN TRANSACTION;"); let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id)) in new_embeddings { for (id, (embedding, user_id)) in new_embeddings {
let mut embedding_str = String::from("["); let embedding_str = serde_json::to_string(&embedding)
for (i, f) in embedding.iter().enumerate() { .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
if i > 0 { write!(
embedding_str.push(','); transaction_query,
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
transaction_query.push_str(&format!(
"CREATE type::thing('knowledge_entity_embedding', '{id}') SET \ "CREATE type::thing('knowledge_entity_embedding', '{id}') SET \
entity_id = type::thing('knowledge_entity', '{id}'), \ entity_id = type::thing('knowledge_entity', '{id}'), \
embedding = {embedding}, \ embedding = {embedding}, \
@@ -546,14 +538,16 @@ impl KnowledgeEntity {
updated_at = time::now();", updated_at = time::now();",
id = id, id = id,
embedding = embedding_str, embedding = embedding_str,
user_id = user_id user_id = user_id,
)); )
.map_err(|e| AppError::InternalError(e.to_string()))?;
} }
transaction_query.push_str(&format!( write!(
"DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {};", transaction_query,
new_dimensions "DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {new_dimensions};",
)); )
.map_err(|e| AppError::InternalError(e.to_string()))?;
transaction_query.push_str("COMMIT TRANSACTION;"); transaction_query.push_str("COMMIT TRANSACTION;");
+12 -19
View File
@@ -290,16 +290,8 @@ impl TextChunk {
let mut transaction_query = String::from("BEGIN TRANSACTION;"); let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id, source_id)) in new_embeddings { for (id, (embedding, user_id, source_id)) in new_embeddings {
let mut embedding_str = String::from("["); let embedding_str = serde_json::to_string(&embedding)
for (i, f) in embedding.iter().enumerate() { .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
// Use the chunk id as the embedding record id to keep a 1:1 mapping
let embedding = embedding_str;
write!( write!(
&mut transaction_query, &mut transaction_query,
"UPSERT type::thing('text_chunk_embedding', '{id}') SET \ "UPSERT type::thing('text_chunk_embedding', '{id}') SET \
@@ -309,6 +301,10 @@ impl TextChunk {
user_id = '{user_id}', \ user_id = '{user_id}', \
created_at = IF created_at != NONE THEN created_at ELSE time::now() END, \ created_at = IF created_at != NONE THEN created_at ELSE time::now() END, \
updated_at = time::now();", updated_at = time::now();",
id = id,
embedding = embedding_str,
source_id = source_id,
user_id = user_id,
) )
.map_err(|e| AppError::InternalError(e.to_string()))?; .map_err(|e| AppError::InternalError(e.to_string()))?;
} }
@@ -409,15 +405,8 @@ impl TextChunk {
let mut transaction_query = String::from("BEGIN TRANSACTION;"); let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id, source_id)) in new_embeddings { for (id, (embedding, user_id, source_id)) in new_embeddings {
let mut embedding_str = String::from("["); let embedding_str = serde_json::to_string(&embedding)
for (i, f) in embedding.iter().enumerate() { .map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
let embedding = embedding_str;
write!( write!(
&mut transaction_query, &mut transaction_query,
"CREATE type::thing('text_chunk_embedding', '{id}') SET \ "CREATE type::thing('text_chunk_embedding', '{id}') SET \
@@ -427,6 +416,10 @@ impl TextChunk {
user_id = '{user_id}', \ user_id = '{user_id}', \
created_at = time::now(), \ created_at = time::now(), \
updated_at = time::now();", updated_at = time::now();",
id = id,
embedding = embedding_str,
source_id = source_id,
user_id = user_id,
) )
.map_err(|e| AppError::InternalError(e.to_string()))?; .map_err(|e| AppError::InternalError(e.to_string()))?;
} }