mirror of
https://github.com/perstarkse/minne.git
synced 2026-04-10 19:26:53 +02:00
test: minio to devenv, improved testing s3 and relationships
This commit is contained in:
@@ -281,6 +281,33 @@ pub mod testing {
|
||||
use crate::utils::config::{AppConfig, PdfIngestMode};
|
||||
use uuid;
|
||||
|
||||
const DEFAULT_TEST_S3_BUCKET: &str = "minne-tests";
|
||||
const DEFAULT_TEST_S3_ENDPOINT: &str = "http://127.0.0.1:19000";
|
||||
|
||||
fn configured_test_s3_bucket() -> String {
|
||||
std::env::var("MINNE_TEST_S3_BUCKET")
|
||||
.ok()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.or_else(|| {
|
||||
std::env::var("S3_BUCKET")
|
||||
.ok()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
})
|
||||
.unwrap_or_else(|| DEFAULT_TEST_S3_BUCKET.to_string())
|
||||
}
|
||||
|
||||
fn configured_test_s3_endpoint() -> String {
|
||||
std::env::var("MINNE_TEST_S3_ENDPOINT")
|
||||
.ok()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.or_else(|| {
|
||||
std::env::var("S3_ENDPOINT")
|
||||
.ok()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
})
|
||||
.unwrap_or_else(|| DEFAULT_TEST_S3_ENDPOINT.to_string())
|
||||
}
|
||||
|
||||
/// Create a test configuration with memory storage.
|
||||
///
|
||||
/// This provides a ready-to-use configuration for testing scenarios
|
||||
@@ -326,7 +353,8 @@ pub mod testing {
|
||||
|
||||
/// Create a test configuration with S3 storage (MinIO).
|
||||
///
|
||||
/// This requires a running MinIO instance on localhost:9000.
|
||||
/// Uses `MINNE_TEST_S3_ENDPOINT` / `S3_ENDPOINT` and
|
||||
/// `MINNE_TEST_S3_BUCKET` / `S3_BUCKET` when provided.
|
||||
pub fn test_config_s3() -> AppConfig {
|
||||
AppConfig {
|
||||
openai_api_key: "test".into(),
|
||||
@@ -339,8 +367,8 @@ pub mod testing {
|
||||
http_port: 0,
|
||||
openai_base_url: "..".into(),
|
||||
storage: StorageKind::S3,
|
||||
s3_bucket: Some("minne-tests".into()),
|
||||
s3_endpoint: Some("http://localhost:9000".into()),
|
||||
s3_bucket: Some(configured_test_s3_bucket()),
|
||||
s3_endpoint: Some(configured_test_s3_endpoint()),
|
||||
s3_region: Some("us-east-1".into()),
|
||||
pdf_ingest_mode: PdfIngestMode::LlmFirst,
|
||||
..Default::default()
|
||||
@@ -391,8 +419,7 @@ pub mod testing {
|
||||
|
||||
/// Create a new TestStorageManager with S3 backend (MinIO).
|
||||
///
|
||||
/// This requires a running MinIO instance on localhost:9000 with
|
||||
/// default credentials (minioadmin/minioadmin) and a 'minne-tests' bucket.
|
||||
/// This requires a reachable MinIO endpoint and an existing test bucket.
|
||||
pub async fn new_s3() -> object_store::Result<Self> {
|
||||
// Ensure credentials are set for MinIO
|
||||
// We set these env vars for the process, which AmazonS3Builder will pick up
|
||||
@@ -403,6 +430,11 @@ pub mod testing {
|
||||
let cfg = test_config_s3();
|
||||
let storage = StorageManager::new(&cfg).await?;
|
||||
|
||||
// Probe the bucket so tests can cleanly skip when the endpoint is unreachable
|
||||
// or the test bucket is not provisioned.
|
||||
let probe_prefix = format!("__minne_s3_probe__/{}", uuid::Uuid::new_v4());
|
||||
storage.list(Some(&probe_prefix)).await?;
|
||||
|
||||
Ok(Self {
|
||||
storage,
|
||||
_temp_dir: None,
|
||||
@@ -923,8 +955,8 @@ mod tests {
|
||||
assert_eq!(*test_storage.storage().backend_kind(), StorageKind::Memory);
|
||||
}
|
||||
|
||||
// S3 Tests - Require MinIO on localhost:9000 with bucket 'minne-tests'
|
||||
// These tests will fail if MinIO is not running or bucket doesn't exist.
|
||||
// S3 Tests - Require a reachable MinIO endpoint and test bucket.
|
||||
// `TestStorageManager::new_s3()` probes connectivity and these tests auto-skip when unavailable.
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_storage_manager_s3_basic_operations() {
|
||||
|
||||
@@ -72,7 +72,7 @@ impl KnowledgeRelationship {
|
||||
) -> Result<(), AppError> {
|
||||
db_client
|
||||
.client
|
||||
.query("DELETE knowledge_entity -> relates_to WHERE metadata.source_id = $source_id")
|
||||
.query("DELETE FROM relates_to WHERE metadata.source_id = $source_id")
|
||||
.bind(("source_id", source_id.to_owned()))
|
||||
.await?
|
||||
.check()?;
|
||||
@@ -127,6 +127,34 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::storage::types::knowledge_entity::{KnowledgeEntity, KnowledgeEntityType};
|
||||
|
||||
async fn setup_test_db() -> SurrealDbClient {
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
|
||||
db
|
||||
}
|
||||
|
||||
async fn get_relationship_by_id(
|
||||
relationship_id: &str,
|
||||
db_client: &SurrealDbClient,
|
||||
) -> Option<KnowledgeRelationship> {
|
||||
let mut result = db_client
|
||||
.client
|
||||
.query("SELECT * FROM type::thing('relates_to', $id)")
|
||||
.bind(("id", relationship_id.to_owned()))
|
||||
.await
|
||||
.expect("relationship query by id failed");
|
||||
|
||||
result.take(0).expect("failed to take relationship by id")
|
||||
}
|
||||
|
||||
// Helper function to create a test knowledge entity for the relationship tests
|
||||
async fn create_test_entity(name: &str, db_client: &SurrealDbClient) -> String {
|
||||
let source_id = "source123".to_string();
|
||||
@@ -178,15 +206,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_store_and_verify_by_source_id() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
let db = setup_test_db().await;
|
||||
|
||||
// Create two entities to relate
|
||||
let entity1_id = create_test_entity("Entity 1", &db).await;
|
||||
@@ -211,33 +231,33 @@ mod tests {
|
||||
.await
|
||||
.expect("Failed to store relationship");
|
||||
|
||||
let persisted = get_relationship_by_id(&relationship.id, &db)
|
||||
.await
|
||||
.expect("Relationship should be retrievable by id");
|
||||
assert_eq!(persisted.in_, entity1_id);
|
||||
assert_eq!(persisted.out, entity2_id);
|
||||
assert_eq!(persisted.metadata.user_id, user_id);
|
||||
assert_eq!(persisted.metadata.source_id, source_id);
|
||||
|
||||
// Query to verify the relationship exists by checking for relationships with our source_id
|
||||
// This approach is more reliable than trying to look up by ID
|
||||
let check_query = format!(
|
||||
"SELECT * FROM relates_to WHERE metadata.source_id = '{}'",
|
||||
source_id
|
||||
);
|
||||
let mut check_result = db.query(check_query).await.expect("Check query failed");
|
||||
let mut check_result = db
|
||||
.query("SELECT * FROM relates_to WHERE metadata.source_id = $source_id")
|
||||
.bind(("source_id", source_id.clone()))
|
||||
.await
|
||||
.expect("Check query failed");
|
||||
let check_results: Vec<KnowledgeRelationship> = check_result.take(0).unwrap_or_default();
|
||||
|
||||
// Just verify that a relationship was created
|
||||
assert!(
|
||||
!check_results.is_empty(),
|
||||
"Relationship should exist in the database"
|
||||
assert_eq!(
|
||||
check_results.len(),
|
||||
1,
|
||||
"Expected one relationship for source_id"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_store_relationship_resists_query_injection() {
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
let db = setup_test_db().await;
|
||||
|
||||
let entity1_id = create_test_entity("Entity 1", &db).await;
|
||||
let entity2_id = create_test_entity("Entity 2", &db).await;
|
||||
@@ -273,11 +293,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_store_and_delete_relationship() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
let db = setup_test_db().await;
|
||||
|
||||
// Create two entities to relate
|
||||
let entity1_id = create_test_entity("Entity 1", &db).await;
|
||||
@@ -338,11 +354,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_relationship_by_id_unauthorized() {
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
let db = setup_test_db().await;
|
||||
|
||||
let entity1_id = create_test_entity("Entity 1", &db).await;
|
||||
let entity2_id = create_test_entity("Entity 2", &db).await;
|
||||
@@ -406,11 +418,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_store_relationship_exists() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
let db = setup_test_db().await;
|
||||
|
||||
// Create entities to relate
|
||||
let entity1_id = create_test_entity("Entity 1", &db).await;
|
||||
@@ -462,49 +470,87 @@ mod tests {
|
||||
.await
|
||||
.expect("Failed to store different relationship");
|
||||
|
||||
// Sanity-check setup: exactly two relationships use source_id and one uses different_source_id.
|
||||
let mut before_delete = db
|
||||
.query("SELECT * FROM relates_to WHERE metadata.source_id = $source_id")
|
||||
.bind(("source_id", source_id.clone()))
|
||||
.await
|
||||
.expect("before delete query failed");
|
||||
let before_delete_rows: Vec<KnowledgeRelationship> =
|
||||
before_delete.take(0).unwrap_or_default();
|
||||
assert_eq!(before_delete_rows.len(), 2);
|
||||
|
||||
let mut before_delete_different = db
|
||||
.query("SELECT * FROM relates_to WHERE metadata.source_id = $source_id")
|
||||
.bind(("source_id", different_source_id.clone()))
|
||||
.await
|
||||
.expect("before delete different query failed");
|
||||
let before_delete_different_rows: Vec<KnowledgeRelationship> =
|
||||
before_delete_different.take(0).unwrap_or_default();
|
||||
assert_eq!(before_delete_different_rows.len(), 1);
|
||||
|
||||
// Delete relationships by source_id
|
||||
KnowledgeRelationship::delete_relationships_by_source_id(&source_id, &db)
|
||||
.await
|
||||
.expect("Failed to delete relationships by source_id");
|
||||
|
||||
// Query to verify the relationships with source_id were deleted
|
||||
let query1 = format!("SELECT * FROM relates_to WHERE id = '{}'", relationship1.id);
|
||||
let query2 = format!("SELECT * FROM relates_to WHERE id = '{}'", relationship2.id);
|
||||
let different_query = format!(
|
||||
"SELECT * FROM relates_to WHERE id = '{}'",
|
||||
different_relationship.id
|
||||
);
|
||||
|
||||
let mut result1 = db.query(query1).await.expect("Query 1 failed");
|
||||
let results1: Vec<KnowledgeRelationship> = result1.take(0).unwrap_or_default();
|
||||
|
||||
let mut result2 = db.query(query2).await.expect("Query 2 failed");
|
||||
let results2: Vec<KnowledgeRelationship> = result2.take(0).unwrap_or_default();
|
||||
|
||||
let mut different_result = db
|
||||
.query(different_query)
|
||||
.await
|
||||
.expect("Different query failed");
|
||||
let _different_results: Vec<KnowledgeRelationship> =
|
||||
different_result.take(0).unwrap_or_default();
|
||||
// Query to verify the specific relationships with source_id were deleted.
|
||||
let result1 = get_relationship_by_id(&relationship1.id, &db).await;
|
||||
let result2 = get_relationship_by_id(&relationship2.id, &db).await;
|
||||
let different_result = get_relationship_by_id(&different_relationship.id, &db).await;
|
||||
|
||||
// Verify relationships with the source_id are deleted
|
||||
assert!(results1.is_empty(), "Relationship 1 should be deleted");
|
||||
assert!(results2.is_empty(), "Relationship 2 should be deleted");
|
||||
assert!(result1.is_none(), "Relationship 1 should be deleted");
|
||||
assert!(result2.is_none(), "Relationship 2 should be deleted");
|
||||
let remaining =
|
||||
different_result.expect("Relationship with different source_id should remain");
|
||||
assert_eq!(remaining.metadata.source_id, different_source_id);
|
||||
}
|
||||
|
||||
// For the relationship with different source ID, we need to check differently
|
||||
// Let's just verify we have a relationship where the source_id matches different_source_id
|
||||
let check_query = format!(
|
||||
"SELECT * FROM relates_to WHERE metadata.source_id = '{}'",
|
||||
different_source_id
|
||||
#[tokio::test]
|
||||
async fn test_delete_relationships_by_source_id_resists_query_injection() {
|
||||
let db = setup_test_db().await;
|
||||
|
||||
let entity1_id = create_test_entity("Entity 1", &db).await;
|
||||
let entity2_id = create_test_entity("Entity 2", &db).await;
|
||||
let entity3_id = create_test_entity("Entity 3", &db).await;
|
||||
|
||||
let safe_relationship = KnowledgeRelationship::new(
|
||||
entity1_id.clone(),
|
||||
entity2_id.clone(),
|
||||
"user123".to_string(),
|
||||
"safe_source".to_string(),
|
||||
"references".to_string(),
|
||||
);
|
||||
let mut check_result = db.query(check_query).await.expect("Check query failed");
|
||||
let check_results: Vec<KnowledgeRelationship> = check_result.take(0).unwrap_or_default();
|
||||
|
||||
// Verify the relationship with a different source_id still exists
|
||||
let other_relationship = KnowledgeRelationship::new(
|
||||
entity2_id,
|
||||
entity3_id,
|
||||
"user123".to_string(),
|
||||
"other_source".to_string(),
|
||||
"contains".to_string(),
|
||||
);
|
||||
|
||||
safe_relationship
|
||||
.store_relationship(&db)
|
||||
.await
|
||||
.expect("store safe relationship");
|
||||
other_relationship
|
||||
.store_relationship(&db)
|
||||
.await
|
||||
.expect("store other relationship");
|
||||
|
||||
KnowledgeRelationship::delete_relationships_by_source_id("safe_source' OR 1=1 --", &db)
|
||||
.await
|
||||
.expect("delete call should succeed");
|
||||
|
||||
let remaining_safe = get_relationship_by_id(&safe_relationship.id, &db).await;
|
||||
let remaining_other = get_relationship_by_id(&other_relationship.id, &db).await;
|
||||
|
||||
assert!(remaining_safe.is_some(), "Safe relationship should remain");
|
||||
assert!(
|
||||
!check_results.is_empty(),
|
||||
"Relationship with different source_id should still exist"
|
||||
remaining_other.is_some(),
|
||||
"Other relationship should remain"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user