fix: edge case when deleting content

nit
This commit is contained in:
Per Stark
2026-01-17 23:31:16 +01:00
parent a3bc6fba98
commit f2fa5bbbcc
4 changed files with 102 additions and 70 deletions

View File

@@ -171,6 +171,9 @@ impl KnowledgeEntity {
source_id: &str,
db_client: &SurrealDbClient,
) -> Result<(), AppError> {
// Delete embeddings first, while we can still look them up via the entity's source_id
KnowledgeEntityEmbedding::delete_by_source_id(source_id, db_client).await?;
let query = format!(
"DELETE {} WHERE source_id = '{}'",
Self::table_name(),
@@ -224,7 +227,7 @@ impl KnowledgeEntity {
) -> Result<Vec<KnowledgeEntityVectorResult>, AppError> {
#[derive(Deserialize)]
struct Row {
entity_id: KnowledgeEntity,
entity_id: Option<KnowledgeEntity>,
score: f32,
}
@@ -257,9 +260,11 @@ impl KnowledgeEntity {
Ok(rows
.into_iter()
.map(|r| KnowledgeEntityVectorResult {
entity: r.entity_id,
score: r.score,
.filter_map(|r| {
r.entity_id.map(|entity| KnowledgeEntityVectorResult {
entity,
score: r.score,
})
})
.collect())
}
@@ -914,4 +919,50 @@ mod tests {
assert_eq!(results[0].entity.id, e2.id);
assert_eq!(results[1].entity.id, e1.id);
}
#[tokio::test]
async fn test_vector_search_with_orphaned_embedding() {
let namespace = "test_ns_orphan";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
KnowledgeEntityEmbedding::redefine_hnsw_index(&db, 3)
.await
.expect("Failed to redefine index length");
let user_id = "user".to_string();
let source_id = "src".to_string();
let entity = KnowledgeEntity::new(
source_id.clone(),
"orphan".to_string(),
"orphan desc".to_string(),
KnowledgeEntityType::Document,
None,
user_id.clone(),
);
KnowledgeEntity::store_with_embedding(entity.clone(), vec![0.1, 0.2, 0.3], &db)
.await
.expect("store entity with embedding");
// Manually delete the entity to create an orphan
let query = format!("DELETE type::thing('knowledge_entity', '{}')", entity.id);
db.client.query(query).await.expect("delete entity");
// Now search
let results = KnowledgeEntity::vector_search(3, vec![0.1, 0.2, 0.3], &db, &user_id)
.await
.expect("search should succeed even with orphans");
assert!(
results.is_empty(),
"Should return empty result for orphan, got: {:?}",
results
);
}
}

View File

@@ -44,6 +44,9 @@ impl TextChunk {
source_id: &str,
db_client: &SurrealDbClient,
) -> Result<(), AppError> {
// Delete embeddings first
TextChunkEmbedding::delete_by_source_id(source_id, db_client).await?;
let query = format!(
"DELETE {} WHERE source_id = '{}'",
Self::table_name(),
@@ -102,7 +105,7 @@ impl TextChunk {
#[allow(clippy::missing_docs_in_private_items)]
#[derive(Deserialize)]
struct Row {
chunk_id: TextChunk,
chunk_id: Option<TextChunk>,
score: f32,
}
@@ -134,9 +137,11 @@ impl TextChunk {
Ok(rows
.into_iter()
.map(|r| TextChunkSearchResult {
chunk: r.chunk_id,
score: r.score,
.filter_map(|r| {
r.chunk_id.map(|chunk| TextChunkSearchResult {
chunk,
score: r.score,
})
})
.collect())
}

View File

@@ -102,44 +102,19 @@ impl TextChunkEmbedding {
/// Delete all embeddings that belong to chunks with a given `source_id`
///
/// This uses a subquery to the `text_chunk` table:
///
/// DELETE FROM text_chunk_embedding
/// WHERE chunk_id IN (SELECT id FROM text_chunk WHERE source_id = $source_id)
/// This uses the denormalized `source_id` on the embedding table.
pub async fn delete_by_source_id(
source_id: &str,
db: &SurrealDbClient,
) -> Result<(), AppError> {
#[allow(clippy::missing_docs_in_private_items)]
#[derive(Deserialize)]
struct IdRow {
id: RecordId,
}
let ids_query = format!(
"SELECT id FROM {} WHERE source_id = $source_id",
TextChunk::table_name()
);
let mut res = db
.client
.query(ids_query)
.bind(("source_id", source_id.to_owned()))
.await
.map_err(AppError::Database)?;
let ids: Vec<IdRow> = res.take(0).map_err(AppError::Database)?;
if ids.is_empty() {
return Ok(());
}
let delete_query = format!(
"DELETE FROM {} WHERE chunk_id IN $chunk_ids",
let query = format!(
"DELETE FROM {} WHERE source_id = $source_id",
Self::table_name()
);
db.client
.query(delete_query)
.bind((
"chunk_ids",
ids.into_iter().map(|row| row.id).collect::<Vec<_>>(),
))
.query(query)
.bind(("source_id", source_id.to_owned()))
.await
.map_err(AppError::Database)?
.check()

View File

@@ -120,79 +120,80 @@ where
}
pub fn build(self) -> Router<S> {
// Start with an empty router
let mut public_router = Router::new();
// Build the "App" router (Pages, API interactions, etc.)
let mut app_router = Router::new();
// Merge all public routers
// Merge all public routers (pages)
for router in self.public_routers {
public_router = public_router.merge(router);
app_router = app_router.merge(router);
}
// Add nested public routes
for (path, router) in self.nested_routes {
public_router = public_router.nest(&path, router);
app_router = app_router.nest(&path, router);
}
// Add public assets to public router
if let Some(assets_config) = self.public_assets_config {
// Call the macro using the stored relative directory path
let asset_service = create_asset_service!(&assets_config.directory);
// Nest the resulting service under the stored URL path
public_router = public_router.nest_service(&assets_config.path, asset_service);
}
// Start with an empty protected router
// Build protected router logic...
let mut protected_router = Router::new();
// Check if there are any protected routers
let has_protected_routes =
!self.protected_routers.is_empty() || !self.nested_protected_routes.is_empty();
// Merge root-level protected routers
for router in self.protected_routers {
protected_router = protected_router.merge(router);
}
// Nest protected routers
for (path, router) in self.nested_protected_routes {
protected_router = protected_router.nest(&path, router);
}
// Apply auth middleware
if has_protected_routes {
protected_router = protected_router
.route_layer(from_fn_with_state(self.app_state.clone(), require_auth));
}
// Combine public and protected routes
let mut router = Router::new().merge(public_router).merge(protected_router);
// Combine public and protected routes into the App router
app_router = app_router.merge(protected_router);
// Apply custom middleware in order they were added
// Apply custom middleware to the App router
for middleware_fn in self.custom_middleware {
router = middleware_fn(router);
app_router = middleware_fn(app_router);
}
// Apply common middleware
router = router.layer(from_fn_with_state(
// Apply App-specific Middleware (Analytics, Template, Auth, Session)
app_router = app_router.layer(from_fn_with_state(
self.app_state.clone(),
analytics_middleware::<HtmlState>,
));
router = router.layer(from_fn_with_state(
app_router = app_router.layer(from_fn_with_state(
self.app_state.clone(),
with_template_response::<HtmlState>,
));
router = router.layer(
app_router = app_router.layer(
AuthSessionLayer::<User, String, SessionSurrealPool<Any>, Surreal<Any>>::new(Some(
self.app_state.db.client.clone(),
))
.with_config(AuthConfig::<String>::default()),
);
router = router.layer(SessionLayer::new((*self.app_state.session_store).clone()));
app_router = app_router.layer(SessionLayer::new((*self.app_state.session_store).clone()));
if self.compression_enabled {
router = router.layer(compression_layer());
// Build the Final router, starting with assets (bypassing app middleware)
let mut final_router = Router::new();
if let Some(assets_config) = self.public_assets_config {
// Call the macro using the stored relative directory path
let asset_service = create_asset_service!(&assets_config.directory);
// Nest the resulting service under the stored URL path
final_router = final_router.nest_service(&assets_config.path, asset_service);
}
router
// Merge the App router
final_router = final_router.merge(app_router);
// Apply Global Middleware (Compression)
if self.compression_enabled {
final_router = final_router.layer(compression_layer());
}
final_router
}
}