mirror of
https://github.com/perstarkse/minne.git
synced 2026-07-01 02:21:34 +02:00
fix: edge case when deleting content
nit
This commit is contained in:
@@ -171,6 +171,9 @@ impl KnowledgeEntity {
|
|||||||
source_id: &str,
|
source_id: &str,
|
||||||
db_client: &SurrealDbClient,
|
db_client: &SurrealDbClient,
|
||||||
) -> Result<(), AppError> {
|
) -> Result<(), AppError> {
|
||||||
|
// Delete embeddings first, while we can still look them up via the entity's source_id
|
||||||
|
KnowledgeEntityEmbedding::delete_by_source_id(source_id, db_client).await?;
|
||||||
|
|
||||||
let query = format!(
|
let query = format!(
|
||||||
"DELETE {} WHERE source_id = '{}'",
|
"DELETE {} WHERE source_id = '{}'",
|
||||||
Self::table_name(),
|
Self::table_name(),
|
||||||
@@ -224,7 +227,7 @@ impl KnowledgeEntity {
|
|||||||
) -> Result<Vec<KnowledgeEntityVectorResult>, AppError> {
|
) -> Result<Vec<KnowledgeEntityVectorResult>, AppError> {
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct Row {
|
struct Row {
|
||||||
entity_id: KnowledgeEntity,
|
entity_id: Option<KnowledgeEntity>,
|
||||||
score: f32,
|
score: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -257,9 +260,11 @@ impl KnowledgeEntity {
|
|||||||
|
|
||||||
Ok(rows
|
Ok(rows
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|r| KnowledgeEntityVectorResult {
|
.filter_map(|r| {
|
||||||
entity: r.entity_id,
|
r.entity_id.map(|entity| KnowledgeEntityVectorResult {
|
||||||
score: r.score,
|
entity,
|
||||||
|
score: r.score,
|
||||||
|
})
|
||||||
})
|
})
|
||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
@@ -914,4 +919,50 @@ mod tests {
|
|||||||
assert_eq!(results[0].entity.id, e2.id);
|
assert_eq!(results[0].entity.id, e2.id);
|
||||||
assert_eq!(results[1].entity.id, e1.id);
|
assert_eq!(results[1].entity.id, e1.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_vector_search_with_orphaned_embedding() {
|
||||||
|
let namespace = "test_ns_orphan";
|
||||||
|
let database = &Uuid::new_v4().to_string();
|
||||||
|
let db = SurrealDbClient::memory(namespace, database)
|
||||||
|
.await
|
||||||
|
.expect("Failed to start in-memory surrealdb");
|
||||||
|
db.apply_migrations()
|
||||||
|
.await
|
||||||
|
.expect("Failed to apply migrations");
|
||||||
|
|
||||||
|
KnowledgeEntityEmbedding::redefine_hnsw_index(&db, 3)
|
||||||
|
.await
|
||||||
|
.expect("Failed to redefine index length");
|
||||||
|
|
||||||
|
let user_id = "user".to_string();
|
||||||
|
let source_id = "src".to_string();
|
||||||
|
let entity = KnowledgeEntity::new(
|
||||||
|
source_id.clone(),
|
||||||
|
"orphan".to_string(),
|
||||||
|
"orphan desc".to_string(),
|
||||||
|
KnowledgeEntityType::Document,
|
||||||
|
None,
|
||||||
|
user_id.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
KnowledgeEntity::store_with_embedding(entity.clone(), vec![0.1, 0.2, 0.3], &db)
|
||||||
|
.await
|
||||||
|
.expect("store entity with embedding");
|
||||||
|
|
||||||
|
// Manually delete the entity to create an orphan
|
||||||
|
let query = format!("DELETE type::thing('knowledge_entity', '{}')", entity.id);
|
||||||
|
db.client.query(query).await.expect("delete entity");
|
||||||
|
|
||||||
|
// Now search
|
||||||
|
let results = KnowledgeEntity::vector_search(3, vec![0.1, 0.2, 0.3], &db, &user_id)
|
||||||
|
.await
|
||||||
|
.expect("search should succeed even with orphans");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
results.is_empty(),
|
||||||
|
"Should return empty result for orphan, got: {:?}",
|
||||||
|
results
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,6 +44,9 @@ impl TextChunk {
|
|||||||
source_id: &str,
|
source_id: &str,
|
||||||
db_client: &SurrealDbClient,
|
db_client: &SurrealDbClient,
|
||||||
) -> Result<(), AppError> {
|
) -> Result<(), AppError> {
|
||||||
|
// Delete embeddings first
|
||||||
|
TextChunkEmbedding::delete_by_source_id(source_id, db_client).await?;
|
||||||
|
|
||||||
let query = format!(
|
let query = format!(
|
||||||
"DELETE {} WHERE source_id = '{}'",
|
"DELETE {} WHERE source_id = '{}'",
|
||||||
Self::table_name(),
|
Self::table_name(),
|
||||||
@@ -102,7 +105,7 @@ impl TextChunk {
|
|||||||
#[allow(clippy::missing_docs_in_private_items)]
|
#[allow(clippy::missing_docs_in_private_items)]
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct Row {
|
struct Row {
|
||||||
chunk_id: TextChunk,
|
chunk_id: Option<TextChunk>,
|
||||||
score: f32,
|
score: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -134,9 +137,11 @@ impl TextChunk {
|
|||||||
|
|
||||||
Ok(rows
|
Ok(rows
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|r| TextChunkSearchResult {
|
.filter_map(|r| {
|
||||||
chunk: r.chunk_id,
|
r.chunk_id.map(|chunk| TextChunkSearchResult {
|
||||||
score: r.score,
|
chunk,
|
||||||
|
score: r.score,
|
||||||
|
})
|
||||||
})
|
})
|
||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -102,44 +102,19 @@ impl TextChunkEmbedding {
|
|||||||
|
|
||||||
/// Delete all embeddings that belong to chunks with a given `source_id`
|
/// Delete all embeddings that belong to chunks with a given `source_id`
|
||||||
///
|
///
|
||||||
/// This uses a subquery to the `text_chunk` table:
|
/// This uses the denormalized `source_id` on the embedding table.
|
||||||
///
|
|
||||||
/// DELETE FROM text_chunk_embedding
|
|
||||||
/// WHERE chunk_id IN (SELECT id FROM text_chunk WHERE source_id = $source_id)
|
|
||||||
pub async fn delete_by_source_id(
|
pub async fn delete_by_source_id(
|
||||||
source_id: &str,
|
source_id: &str,
|
||||||
db: &SurrealDbClient,
|
db: &SurrealDbClient,
|
||||||
) -> Result<(), AppError> {
|
) -> Result<(), AppError> {
|
||||||
#[allow(clippy::missing_docs_in_private_items)]
|
let query = format!(
|
||||||
#[derive(Deserialize)]
|
"DELETE FROM {} WHERE source_id = $source_id",
|
||||||
struct IdRow {
|
|
||||||
id: RecordId,
|
|
||||||
}
|
|
||||||
let ids_query = format!(
|
|
||||||
"SELECT id FROM {} WHERE source_id = $source_id",
|
|
||||||
TextChunk::table_name()
|
|
||||||
);
|
|
||||||
let mut res = db
|
|
||||||
.client
|
|
||||||
.query(ids_query)
|
|
||||||
.bind(("source_id", source_id.to_owned()))
|
|
||||||
.await
|
|
||||||
.map_err(AppError::Database)?;
|
|
||||||
let ids: Vec<IdRow> = res.take(0).map_err(AppError::Database)?;
|
|
||||||
|
|
||||||
if ids.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let delete_query = format!(
|
|
||||||
"DELETE FROM {} WHERE chunk_id IN $chunk_ids",
|
|
||||||
Self::table_name()
|
Self::table_name()
|
||||||
);
|
);
|
||||||
|
|
||||||
db.client
|
db.client
|
||||||
.query(delete_query)
|
.query(query)
|
||||||
.bind((
|
.bind(("source_id", source_id.to_owned()))
|
||||||
"chunk_ids",
|
|
||||||
ids.into_iter().map(|row| row.id).collect::<Vec<_>>(),
|
|
||||||
))
|
|
||||||
.await
|
.await
|
||||||
.map_err(AppError::Database)?
|
.map_err(AppError::Database)?
|
||||||
.check()
|
.check()
|
||||||
|
|||||||
@@ -120,79 +120,80 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn build(self) -> Router<S> {
|
pub fn build(self) -> Router<S> {
|
||||||
// Start with an empty router
|
// Build the "App" router (Pages, API interactions, etc.)
|
||||||
let mut public_router = Router::new();
|
let mut app_router = Router::new();
|
||||||
|
|
||||||
// Merge all public routers
|
// Merge all public routers (pages)
|
||||||
for router in self.public_routers {
|
for router in self.public_routers {
|
||||||
public_router = public_router.merge(router);
|
app_router = app_router.merge(router);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add nested public routes
|
// Add nested public routes
|
||||||
for (path, router) in self.nested_routes {
|
for (path, router) in self.nested_routes {
|
||||||
public_router = public_router.nest(&path, router);
|
app_router = app_router.nest(&path, router);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add public assets to public router
|
// Build protected router logic...
|
||||||
if let Some(assets_config) = self.public_assets_config {
|
|
||||||
// Call the macro using the stored relative directory path
|
|
||||||
let asset_service = create_asset_service!(&assets_config.directory);
|
|
||||||
// Nest the resulting service under the stored URL path
|
|
||||||
public_router = public_router.nest_service(&assets_config.path, asset_service);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start with an empty protected router
|
|
||||||
let mut protected_router = Router::new();
|
let mut protected_router = Router::new();
|
||||||
|
|
||||||
// Check if there are any protected routers
|
|
||||||
let has_protected_routes =
|
let has_protected_routes =
|
||||||
!self.protected_routers.is_empty() || !self.nested_protected_routes.is_empty();
|
!self.protected_routers.is_empty() || !self.nested_protected_routes.is_empty();
|
||||||
|
|
||||||
// Merge root-level protected routers
|
|
||||||
for router in self.protected_routers {
|
for router in self.protected_routers {
|
||||||
protected_router = protected_router.merge(router);
|
protected_router = protected_router.merge(router);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Nest protected routers
|
|
||||||
for (path, router) in self.nested_protected_routes {
|
for (path, router) in self.nested_protected_routes {
|
||||||
protected_router = protected_router.nest(&path, router);
|
protected_router = protected_router.nest(&path, router);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply auth middleware
|
|
||||||
if has_protected_routes {
|
if has_protected_routes {
|
||||||
protected_router = protected_router
|
protected_router = protected_router
|
||||||
.route_layer(from_fn_with_state(self.app_state.clone(), require_auth));
|
.route_layer(from_fn_with_state(self.app_state.clone(), require_auth));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Combine public and protected routes
|
// Combine public and protected routes into the App router
|
||||||
let mut router = Router::new().merge(public_router).merge(protected_router);
|
app_router = app_router.merge(protected_router);
|
||||||
|
|
||||||
// Apply custom middleware in order they were added
|
// Apply custom middleware to the App router
|
||||||
for middleware_fn in self.custom_middleware {
|
for middleware_fn in self.custom_middleware {
|
||||||
router = middleware_fn(router);
|
app_router = middleware_fn(app_router);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply common middleware
|
// Apply App-specific Middleware (Analytics, Template, Auth, Session)
|
||||||
router = router.layer(from_fn_with_state(
|
app_router = app_router.layer(from_fn_with_state(
|
||||||
self.app_state.clone(),
|
self.app_state.clone(),
|
||||||
analytics_middleware::<HtmlState>,
|
analytics_middleware::<HtmlState>,
|
||||||
));
|
));
|
||||||
router = router.layer(from_fn_with_state(
|
app_router = app_router.layer(from_fn_with_state(
|
||||||
self.app_state.clone(),
|
self.app_state.clone(),
|
||||||
with_template_response::<HtmlState>,
|
with_template_response::<HtmlState>,
|
||||||
));
|
));
|
||||||
router = router.layer(
|
app_router = app_router.layer(
|
||||||
AuthSessionLayer::<User, String, SessionSurrealPool<Any>, Surreal<Any>>::new(Some(
|
AuthSessionLayer::<User, String, SessionSurrealPool<Any>, Surreal<Any>>::new(Some(
|
||||||
self.app_state.db.client.clone(),
|
self.app_state.db.client.clone(),
|
||||||
))
|
))
|
||||||
.with_config(AuthConfig::<String>::default()),
|
.with_config(AuthConfig::<String>::default()),
|
||||||
);
|
);
|
||||||
router = router.layer(SessionLayer::new((*self.app_state.session_store).clone()));
|
app_router = app_router.layer(SessionLayer::new((*self.app_state.session_store).clone()));
|
||||||
|
|
||||||
if self.compression_enabled {
|
// Build the Final router, starting with assets (bypassing app middleware)
|
||||||
router = router.layer(compression_layer());
|
let mut final_router = Router::new();
|
||||||
|
|
||||||
|
if let Some(assets_config) = self.public_assets_config {
|
||||||
|
// Call the macro using the stored relative directory path
|
||||||
|
let asset_service = create_asset_service!(&assets_config.directory);
|
||||||
|
// Nest the resulting service under the stored URL path
|
||||||
|
final_router = final_router.nest_service(&assets_config.path, asset_service);
|
||||||
}
|
}
|
||||||
|
|
||||||
router
|
// Merge the App router
|
||||||
|
final_router = final_router.merge(app_router);
|
||||||
|
|
||||||
|
// Apply Global Middleware (Compression)
|
||||||
|
if self.compression_enabled {
|
||||||
|
final_router = final_router.layer(compression_layer());
|
||||||
|
}
|
||||||
|
|
||||||
|
final_router
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user