mirror of
https://github.com/perstarkse/minne.git
synced 2026-02-21 15:47:39 +01:00
fix: edge case when deleting content
nit
This commit is contained in:
@@ -171,6 +171,9 @@ impl KnowledgeEntity {
|
||||
source_id: &str,
|
||||
db_client: &SurrealDbClient,
|
||||
) -> Result<(), AppError> {
|
||||
// Delete embeddings first, while we can still look them up via the entity's source_id
|
||||
KnowledgeEntityEmbedding::delete_by_source_id(source_id, db_client).await?;
|
||||
|
||||
let query = format!(
|
||||
"DELETE {} WHERE source_id = '{}'",
|
||||
Self::table_name(),
|
||||
@@ -224,7 +227,7 @@ impl KnowledgeEntity {
|
||||
) -> Result<Vec<KnowledgeEntityVectorResult>, AppError> {
|
||||
#[derive(Deserialize)]
|
||||
struct Row {
|
||||
entity_id: KnowledgeEntity,
|
||||
entity_id: Option<KnowledgeEntity>,
|
||||
score: f32,
|
||||
}
|
||||
|
||||
@@ -257,9 +260,11 @@ impl KnowledgeEntity {
|
||||
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|r| KnowledgeEntityVectorResult {
|
||||
entity: r.entity_id,
|
||||
score: r.score,
|
||||
.filter_map(|r| {
|
||||
r.entity_id.map(|entity| KnowledgeEntityVectorResult {
|
||||
entity,
|
||||
score: r.score,
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
@@ -914,4 +919,50 @@ mod tests {
|
||||
assert_eq!(results[0].entity.id, e2.id);
|
||||
assert_eq!(results[1].entity.id, e1.id);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_vector_search_with_orphaned_embedding() {
|
||||
let namespace = "test_ns_orphan";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
|
||||
KnowledgeEntityEmbedding::redefine_hnsw_index(&db, 3)
|
||||
.await
|
||||
.expect("Failed to redefine index length");
|
||||
|
||||
let user_id = "user".to_string();
|
||||
let source_id = "src".to_string();
|
||||
let entity = KnowledgeEntity::new(
|
||||
source_id.clone(),
|
||||
"orphan".to_string(),
|
||||
"orphan desc".to_string(),
|
||||
KnowledgeEntityType::Document,
|
||||
None,
|
||||
user_id.clone(),
|
||||
);
|
||||
|
||||
KnowledgeEntity::store_with_embedding(entity.clone(), vec![0.1, 0.2, 0.3], &db)
|
||||
.await
|
||||
.expect("store entity with embedding");
|
||||
|
||||
// Manually delete the entity to create an orphan
|
||||
let query = format!("DELETE type::thing('knowledge_entity', '{}')", entity.id);
|
||||
db.client.query(query).await.expect("delete entity");
|
||||
|
||||
// Now search
|
||||
let results = KnowledgeEntity::vector_search(3, vec![0.1, 0.2, 0.3], &db, &user_id)
|
||||
.await
|
||||
.expect("search should succeed even with orphans");
|
||||
|
||||
assert!(
|
||||
results.is_empty(),
|
||||
"Should return empty result for orphan, got: {:?}",
|
||||
results
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,6 +44,9 @@ impl TextChunk {
|
||||
source_id: &str,
|
||||
db_client: &SurrealDbClient,
|
||||
) -> Result<(), AppError> {
|
||||
// Delete embeddings first
|
||||
TextChunkEmbedding::delete_by_source_id(source_id, db_client).await?;
|
||||
|
||||
let query = format!(
|
||||
"DELETE {} WHERE source_id = '{}'",
|
||||
Self::table_name(),
|
||||
@@ -102,7 +105,7 @@ impl TextChunk {
|
||||
#[allow(clippy::missing_docs_in_private_items)]
|
||||
#[derive(Deserialize)]
|
||||
struct Row {
|
||||
chunk_id: TextChunk,
|
||||
chunk_id: Option<TextChunk>,
|
||||
score: f32,
|
||||
}
|
||||
|
||||
@@ -134,9 +137,11 @@ impl TextChunk {
|
||||
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|r| TextChunkSearchResult {
|
||||
chunk: r.chunk_id,
|
||||
score: r.score,
|
||||
.filter_map(|r| {
|
||||
r.chunk_id.map(|chunk| TextChunkSearchResult {
|
||||
chunk,
|
||||
score: r.score,
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
@@ -102,44 +102,19 @@ impl TextChunkEmbedding {
|
||||
|
||||
/// Delete all embeddings that belong to chunks with a given `source_id`
|
||||
///
|
||||
/// This uses a subquery to the `text_chunk` table:
|
||||
///
|
||||
/// DELETE FROM text_chunk_embedding
|
||||
/// WHERE chunk_id IN (SELECT id FROM text_chunk WHERE source_id = $source_id)
|
||||
/// This uses the denormalized `source_id` on the embedding table.
|
||||
pub async fn delete_by_source_id(
|
||||
source_id: &str,
|
||||
db: &SurrealDbClient,
|
||||
) -> Result<(), AppError> {
|
||||
#[allow(clippy::missing_docs_in_private_items)]
|
||||
#[derive(Deserialize)]
|
||||
struct IdRow {
|
||||
id: RecordId,
|
||||
}
|
||||
let ids_query = format!(
|
||||
"SELECT id FROM {} WHERE source_id = $source_id",
|
||||
TextChunk::table_name()
|
||||
);
|
||||
let mut res = db
|
||||
.client
|
||||
.query(ids_query)
|
||||
.bind(("source_id", source_id.to_owned()))
|
||||
.await
|
||||
.map_err(AppError::Database)?;
|
||||
let ids: Vec<IdRow> = res.take(0).map_err(AppError::Database)?;
|
||||
|
||||
if ids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let delete_query = format!(
|
||||
"DELETE FROM {} WHERE chunk_id IN $chunk_ids",
|
||||
let query = format!(
|
||||
"DELETE FROM {} WHERE source_id = $source_id",
|
||||
Self::table_name()
|
||||
);
|
||||
|
||||
db.client
|
||||
.query(delete_query)
|
||||
.bind((
|
||||
"chunk_ids",
|
||||
ids.into_iter().map(|row| row.id).collect::<Vec<_>>(),
|
||||
))
|
||||
.query(query)
|
||||
.bind(("source_id", source_id.to_owned()))
|
||||
.await
|
||||
.map_err(AppError::Database)?
|
||||
.check()
|
||||
|
||||
@@ -120,79 +120,80 @@ where
|
||||
}
|
||||
|
||||
pub fn build(self) -> Router<S> {
|
||||
// Start with an empty router
|
||||
let mut public_router = Router::new();
|
||||
// Build the "App" router (Pages, API interactions, etc.)
|
||||
let mut app_router = Router::new();
|
||||
|
||||
// Merge all public routers
|
||||
// Merge all public routers (pages)
|
||||
for router in self.public_routers {
|
||||
public_router = public_router.merge(router);
|
||||
app_router = app_router.merge(router);
|
||||
}
|
||||
|
||||
// Add nested public routes
|
||||
for (path, router) in self.nested_routes {
|
||||
public_router = public_router.nest(&path, router);
|
||||
app_router = app_router.nest(&path, router);
|
||||
}
|
||||
|
||||
// Add public assets to public router
|
||||
if let Some(assets_config) = self.public_assets_config {
|
||||
// Call the macro using the stored relative directory path
|
||||
let asset_service = create_asset_service!(&assets_config.directory);
|
||||
// Nest the resulting service under the stored URL path
|
||||
public_router = public_router.nest_service(&assets_config.path, asset_service);
|
||||
}
|
||||
|
||||
// Start with an empty protected router
|
||||
// Build protected router logic...
|
||||
let mut protected_router = Router::new();
|
||||
|
||||
// Check if there are any protected routers
|
||||
let has_protected_routes =
|
||||
!self.protected_routers.is_empty() || !self.nested_protected_routes.is_empty();
|
||||
|
||||
// Merge root-level protected routers
|
||||
for router in self.protected_routers {
|
||||
protected_router = protected_router.merge(router);
|
||||
}
|
||||
|
||||
// Nest protected routers
|
||||
for (path, router) in self.nested_protected_routes {
|
||||
protected_router = protected_router.nest(&path, router);
|
||||
}
|
||||
|
||||
// Apply auth middleware
|
||||
if has_protected_routes {
|
||||
protected_router = protected_router
|
||||
.route_layer(from_fn_with_state(self.app_state.clone(), require_auth));
|
||||
}
|
||||
|
||||
// Combine public and protected routes
|
||||
let mut router = Router::new().merge(public_router).merge(protected_router);
|
||||
// Combine public and protected routes into the App router
|
||||
app_router = app_router.merge(protected_router);
|
||||
|
||||
// Apply custom middleware in order they were added
|
||||
// Apply custom middleware to the App router
|
||||
for middleware_fn in self.custom_middleware {
|
||||
router = middleware_fn(router);
|
||||
app_router = middleware_fn(app_router);
|
||||
}
|
||||
|
||||
// Apply common middleware
|
||||
router = router.layer(from_fn_with_state(
|
||||
// Apply App-specific Middleware (Analytics, Template, Auth, Session)
|
||||
app_router = app_router.layer(from_fn_with_state(
|
||||
self.app_state.clone(),
|
||||
analytics_middleware::<HtmlState>,
|
||||
));
|
||||
router = router.layer(from_fn_with_state(
|
||||
app_router = app_router.layer(from_fn_with_state(
|
||||
self.app_state.clone(),
|
||||
with_template_response::<HtmlState>,
|
||||
));
|
||||
router = router.layer(
|
||||
app_router = app_router.layer(
|
||||
AuthSessionLayer::<User, String, SessionSurrealPool<Any>, Surreal<Any>>::new(Some(
|
||||
self.app_state.db.client.clone(),
|
||||
))
|
||||
.with_config(AuthConfig::<String>::default()),
|
||||
);
|
||||
router = router.layer(SessionLayer::new((*self.app_state.session_store).clone()));
|
||||
app_router = app_router.layer(SessionLayer::new((*self.app_state.session_store).clone()));
|
||||
|
||||
if self.compression_enabled {
|
||||
router = router.layer(compression_layer());
|
||||
// Build the Final router, starting with assets (bypassing app middleware)
|
||||
let mut final_router = Router::new();
|
||||
|
||||
if let Some(assets_config) = self.public_assets_config {
|
||||
// Call the macro using the stored relative directory path
|
||||
let asset_service = create_asset_service!(&assets_config.directory);
|
||||
// Nest the resulting service under the stored URL path
|
||||
final_router = final_router.nest_service(&assets_config.path, asset_service);
|
||||
}
|
||||
|
||||
router
|
||||
// Merge the App router
|
||||
final_router = final_router.merge(app_router);
|
||||
|
||||
// Apply Global Middleware (Compression)
|
||||
if self.compression_enabled {
|
||||
final_router = final_router.layer(compression_layer());
|
||||
}
|
||||
|
||||
final_router
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user