fix: replace manual embedding serialization with serde_json

- replaced write!() loops with serde_json::to_string in 4 re-embedding methods
- standardized SQL building to use write!() with proper error propagation
- eliminates manual f32 vector string building (memory waste + loop risk)
This commit is contained in:
Per Stark
2026-05-27 14:13:19 +02:00
parent 9609880cff
commit 0acdba4f54
2 changed files with 36 additions and 49 deletions
+24 -30
View File
@@ -403,15 +403,10 @@ impl KnowledgeEntity {
// Add all update statements to the embedding table
for (id, (embedding, user_id)) in new_embeddings {
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
transaction_query.push_str(&format!(
let embedding_str = serde_json::to_string(&embedding)
.map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
write!(
transaction_query,
"UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \
entity_id = type::thing('knowledge_entity', '{id}'), \
embedding = {embedding}, \
@@ -420,14 +415,16 @@ impl KnowledgeEntity {
updated_at = time::now();",
id = id,
embedding = embedding_str,
user_id = user_id
));
user_id = user_id,
)
.map_err(|e| AppError::InternalError(e.to_string()))?;
}
transaction_query.push_str(&format!(
"DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {};",
new_dimensions
));
write!(
transaction_query,
"DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {new_dimensions};",
)
.map_err(|e| AppError::InternalError(e.to_string()))?;
transaction_query.push_str("COMMIT TRANSACTION;");
@@ -529,15 +526,10 @@ impl KnowledgeEntity {
let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id)) in new_embeddings {
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
transaction_query.push_str(&format!(
let embedding_str = serde_json::to_string(&embedding)
.map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
write!(
transaction_query,
"CREATE type::thing('knowledge_entity_embedding', '{id}') SET \
entity_id = type::thing('knowledge_entity', '{id}'), \
embedding = {embedding}, \
@@ -546,14 +538,16 @@ impl KnowledgeEntity {
updated_at = time::now();",
id = id,
embedding = embedding_str,
user_id = user_id
));
user_id = user_id,
)
.map_err(|e| AppError::InternalError(e.to_string()))?;
}
transaction_query.push_str(&format!(
"DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {};",
new_dimensions
));
write!(
transaction_query,
"DEFINE INDEX OVERWRITE idx_embedding_knowledge_entity_embedding ON TABLE knowledge_entity_embedding FIELDS embedding HNSW DIMENSION {new_dimensions};",
)
.map_err(|e| AppError::InternalError(e.to_string()))?;
transaction_query.push_str("COMMIT TRANSACTION;");
+12 -19
View File
@@ -290,16 +290,8 @@ impl TextChunk {
let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id, source_id)) in new_embeddings {
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
// Use the chunk id as the embedding record id to keep a 1:1 mapping
let embedding = embedding_str;
let embedding_str = serde_json::to_string(&embedding)
.map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
write!(
&mut transaction_query,
"UPSERT type::thing('text_chunk_embedding', '{id}') SET \
@@ -309,6 +301,10 @@ impl TextChunk {
user_id = '{user_id}', \
created_at = IF created_at != NONE THEN created_at ELSE time::now() END, \
updated_at = time::now();",
id = id,
embedding = embedding_str,
source_id = source_id,
user_id = user_id,
)
.map_err(|e| AppError::InternalError(e.to_string()))?;
}
@@ -409,15 +405,8 @@ impl TextChunk {
let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id, source_id)) in new_embeddings {
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
let embedding = embedding_str;
let embedding_str = serde_json::to_string(&embedding)
.map_err(|e| AppError::InternalError(format!("embedding serialization failed: {e}")))?;
write!(
&mut transaction_query,
"CREATE type::thing('text_chunk_embedding', '{id}') SET \
@@ -427,6 +416,10 @@ impl TextChunk {
user_id = '{user_id}', \
created_at = time::now(), \
updated_at = time::now();",
id = id,
embedding = embedding_str,
source_id = source_id,
user_id = user_id,
)
.map_err(|e| AppError::InternalError(e.to_string()))?;
}