fix: edge case when deleting content

nit
This commit is contained in:
Per Stark
2026-01-17 23:31:16 +01:00
parent a3bc6fba98
commit f2fa5bbbcc
4 changed files with 102 additions and 70 deletions

View File

@@ -171,6 +171,9 @@ impl KnowledgeEntity {
source_id: &str,
db_client: &SurrealDbClient,
) -> Result<(), AppError> {
// Delete embeddings first, while we can still look them up via the entity's source_id
KnowledgeEntityEmbedding::delete_by_source_id(source_id, db_client).await?;
let query = format!(
"DELETE {} WHERE source_id = '{}'",
Self::table_name(),
@@ -224,7 +227,7 @@ impl KnowledgeEntity {
) -> Result<Vec<KnowledgeEntityVectorResult>, AppError> {
#[derive(Deserialize)]
struct Row {
entity_id: KnowledgeEntity,
entity_id: Option<KnowledgeEntity>,
score: f32,
}
@@ -257,9 +260,11 @@ impl KnowledgeEntity {
Ok(rows
.into_iter()
.map(|r| KnowledgeEntityVectorResult {
entity: r.entity_id,
score: r.score,
.filter_map(|r| {
r.entity_id.map(|entity| KnowledgeEntityVectorResult {
entity,
score: r.score,
})
})
.collect())
}
@@ -914,4 +919,50 @@ mod tests {
assert_eq!(results[0].entity.id, e2.id);
assert_eq!(results[1].entity.id, e1.id);
}
#[tokio::test]
async fn test_vector_search_with_orphaned_embedding() {
let namespace = "test_ns_orphan";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
KnowledgeEntityEmbedding::redefine_hnsw_index(&db, 3)
.await
.expect("Failed to redefine index length");
let user_id = "user".to_string();
let source_id = "src".to_string();
let entity = KnowledgeEntity::new(
source_id.clone(),
"orphan".to_string(),
"orphan desc".to_string(),
KnowledgeEntityType::Document,
None,
user_id.clone(),
);
KnowledgeEntity::store_with_embedding(entity.clone(), vec![0.1, 0.2, 0.3], &db)
.await
.expect("store entity with embedding");
// Manually delete the entity to create an orphan
let query = format!("DELETE type::thing('knowledge_entity', '{}')", entity.id);
db.client.query(query).await.expect("delete entity");
// Now search
let results = KnowledgeEntity::vector_search(3, vec![0.1, 0.2, 0.3], &db, &user_id)
.await
.expect("search should succeed even with orphans");
assert!(
results.is_empty(),
"Should return empty result for orphan, got: {:?}",
results
);
}
}

View File

@@ -44,6 +44,9 @@ impl TextChunk {
source_id: &str,
db_client: &SurrealDbClient,
) -> Result<(), AppError> {
// Delete embeddings first
TextChunkEmbedding::delete_by_source_id(source_id, db_client).await?;
let query = format!(
"DELETE {} WHERE source_id = '{}'",
Self::table_name(),
@@ -102,7 +105,7 @@ impl TextChunk {
#[allow(clippy::missing_docs_in_private_items)]
#[derive(Deserialize)]
struct Row {
chunk_id: TextChunk,
chunk_id: Option<TextChunk>,
score: f32,
}
@@ -134,9 +137,11 @@ impl TextChunk {
Ok(rows
.into_iter()
.map(|r| TextChunkSearchResult {
chunk: r.chunk_id,
score: r.score,
.filter_map(|r| {
r.chunk_id.map(|chunk| TextChunkSearchResult {
chunk,
score: r.score,
})
})
.collect())
}

View File

@@ -102,44 +102,19 @@ impl TextChunkEmbedding {
/// Delete all embeddings that belong to chunks with a given `source_id`
///
/// This uses a subquery to the `text_chunk` table:
///
/// DELETE FROM text_chunk_embedding
/// WHERE chunk_id IN (SELECT id FROM text_chunk WHERE source_id = $source_id)
/// This uses the denormalized `source_id` on the embedding table.
pub async fn delete_by_source_id(
source_id: &str,
db: &SurrealDbClient,
) -> Result<(), AppError> {
#[allow(clippy::missing_docs_in_private_items)]
#[derive(Deserialize)]
struct IdRow {
id: RecordId,
}
let ids_query = format!(
"SELECT id FROM {} WHERE source_id = $source_id",
TextChunk::table_name()
);
let mut res = db
.client
.query(ids_query)
.bind(("source_id", source_id.to_owned()))
.await
.map_err(AppError::Database)?;
let ids: Vec<IdRow> = res.take(0).map_err(AppError::Database)?;
if ids.is_empty() {
return Ok(());
}
let delete_query = format!(
"DELETE FROM {} WHERE chunk_id IN $chunk_ids",
let query = format!(
"DELETE FROM {} WHERE source_id = $source_id",
Self::table_name()
);
db.client
.query(delete_query)
.bind((
"chunk_ids",
ids.into_iter().map(|row| row.id).collect::<Vec<_>>(),
))
.query(query)
.bind(("source_id", source_id.to_owned()))
.await
.map_err(AppError::Database)?
.check()