From 93d11b66eb4e98cd5902fe7daa9e95a8c25f8de4 Mon Sep 17 00:00:00 2001 From: Per Stark Date: Fri, 29 May 2026 11:57:39 +0200 Subject: [PATCH] test: cover system settings sync, validation, and ingestion prompts Add tests for embedding provider sync, patch isolation, typed backend serde, and DB-backed ingestion prompts. --- common/src/storage/types/system_settings.rs | 175 ++++++++++++++++++++ common/src/utils/embedding.rs | 69 ++++++++ ingestion-pipeline/src/pipeline/services.rs | 81 +++++++++ 3 files changed, 325 insertions(+) diff --git a/common/src/storage/types/system_settings.rs b/common/src/storage/types/system_settings.rs index 8f74adf..ba5faf6 100644 --- a/common/src/storage/types/system_settings.rs +++ b/common/src/storage/types/system_settings.rs @@ -499,6 +499,181 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_patch_leaves_unmentioned_fields_unchanged() -> anyhow::Result<()> { + let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + db.apply_migrations() + .await + .with_context(|| "Failed to apply migrations".to_string())?; + + let original = SystemSettings::get_current(&db) + .await + .with_context(|| "Failed to get system settings".to_string())?; + let sentinel = "custom-query-prompt-sentinel".to_string(); + + let patched = SystemSettingsPatch { + query_system_prompt: Some(sentinel.clone()), + ..Default::default() + } + .apply(&db) + .await + .with_context(|| "Failed to patch query prompt".to_string())?; + + assert_eq!(patched.query_system_prompt, sentinel); + assert_eq!(patched.ingestion_system_prompt, original.ingestion_system_prompt); + assert_eq!(patched.query_model, original.query_model); + assert_eq!( + patched.registrations_enabled, + original.registrations_enabled + ); + Ok(()) + } + + #[tokio::test] + async fn test_update_rejects_empty_model_name() -> anyhow::Result<()> { + let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + db.apply_migrations() + .await + .with_context(|| "Failed to apply migrations".to_string())?; + + let mut invalid_settings = SystemSettings::get_current(&db) + .await + .with_context(|| "Failed to get system settings".to_string())?; + invalid_settings.query_model = " ".to_string(); + + let result = SystemSettings::update(&db, invalid_settings).await; + assert!(matches!(result, Err(AppError::Validation(_)))); + Ok(()) + } + + #[tokio::test] + async fn test_update_normalizes_record_id() -> anyhow::Result<()> { + let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + db.apply_migrations() + .await + .with_context(|| "Failed to apply migrations".to_string())?; + + let mut settings = SystemSettings::get_current(&db) + .await + .with_context(|| "Failed to get system settings".to_string())?; + settings.id = "wrong-id".to_string(); + + let updated = SystemSettings::update(&db, settings) + .await + .with_context(|| "Failed to update settings".to_string())?; + assert_eq!(updated.id, SystemSettings::RECORD_ID); + Ok(()) + } + + #[tokio::test] + async fn test_update_preserves_embedding_backend() -> anyhow::Result<()> { + use crate::utils::embedding::EmbeddingProvider; + + let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + db.apply_migrations() + .await + .with_context(|| "Failed to apply migrations".to_string())?; + + let provider = EmbeddingProvider::new_hashed(384) + .with_context(|| "Failed to create hashed embedding provider".to_string())?; + SystemSettings::sync_from_embedding_provider(&db, &provider) + .await + .with_context(|| "Failed to sync embedding provider".to_string())?; + + let synced = SystemSettings::get_current(&db) + .await + .with_context(|| "Failed to get synced settings".to_string())?; + assert_eq!(synced.embedding_backend, Some(EmbeddingBackend::Hashed)); + + let mut tampered = synced; + tampered.embedding_backend = Some(EmbeddingBackend::OpenAI); + let updated = SystemSettings::update(&db, tampered) + .await + .with_context(|| "Failed to update settings".to_string())?; + + assert_eq!(updated.embedding_backend, Some(EmbeddingBackend::Hashed)); + Ok(()) + } + + #[tokio::test] + async fn test_sync_from_embedding_provider_updates_mismatched_settings() -> anyhow::Result<()> { + use crate::utils::embedding::EmbeddingProvider; + + let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + db.apply_migrations() + .await + .with_context(|| "Failed to apply migrations".to_string())?; + + let provider = EmbeddingProvider::new_hashed(384) + .with_context(|| "Failed to create hashed embedding provider".to_string())?; + let (settings, changed) = SystemSettings::sync_from_embedding_provider(&db, &provider) + .await + .with_context(|| "Failed to sync embedding provider".to_string())?; + + assert!(changed); + assert_eq!(settings.embedding_backend, Some(EmbeddingBackend::Hashed)); + assert_eq!(settings.embedding_dimensions, 384); + + let persisted = SystemSettings::get_current(&db) + .await + .with_context(|| "Failed to reload synced settings".to_string())?; + assert_eq!(persisted.embedding_backend, Some(EmbeddingBackend::Hashed)); + assert_eq!(persisted.embedding_dimensions, 384); + Ok(()) + } + + #[tokio::test] + async fn test_sync_from_embedding_provider_is_noop_when_already_synced() -> anyhow::Result<()> { + use crate::utils::embedding::EmbeddingProvider; + + let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + db.apply_migrations() + .await + .with_context(|| "Failed to apply migrations".to_string())?; + + let provider = EmbeddingProvider::new_hashed(384) + .with_context(|| "Failed to create hashed embedding provider".to_string())?; + SystemSettings::sync_from_embedding_provider(&db, &provider) + .await + .with_context(|| "Failed to initial sync".to_string())?; + + let (_, changed) = SystemSettings::sync_from_embedding_provider(&db, &provider) + .await + .with_context(|| "Failed to repeat sync".to_string())?; + assert!(!changed); + Ok(()) + } + + #[tokio::test] + async fn test_sync_rejects_provider_dimension_above_u32_max() -> anyhow::Result<()> { + use crate::utils::embedding::EmbeddingProvider; + + let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + db.apply_migrations() + .await + .with_context(|| "Failed to apply migrations".to_string())?; + + let provider = EmbeddingProvider::new_hashed((u32::MAX as usize) + 1) + .with_context(|| "Failed to create oversized hashed provider".to_string())?; + let result = SystemSettings::sync_from_embedding_provider(&db, &provider).await; + assert!(matches!(result, Err(AppError::Validation(_)))); + Ok(()) + } + #[tokio::test] async fn test_migration_after_changing_embedding_length() -> anyhow::Result<()> { let db = SurrealDbClient::memory("test", &Uuid::new_v4().to_string()) diff --git a/common/src/utils/embedding.rs b/common/src/utils/embedding.rs index be7c3e7..f67ab4c 100644 --- a/common/src/utils/embedding.rs +++ b/common/src/utils/embedding.rs @@ -453,3 +453,72 @@ pub async fn generate_embedding_with_params( Ok(embedding) } + +#[cfg(test)] +mod tests { + use super::{EmbeddingBackend, ParseEmbeddingBackendError}; + use crate::storage::types::system_settings::SystemSettings; + use serde_json::json; + + #[test] + fn embedding_backend_as_str_matches_serde_names() { + assert_eq!(EmbeddingBackend::OpenAI.as_str(), "openai"); + assert_eq!(EmbeddingBackend::FastEmbed.as_str(), "fastembed"); + assert_eq!(EmbeddingBackend::Hashed.as_str(), "hashed"); + + assert_eq!( + serde_json::to_string(&EmbeddingBackend::FastEmbed).expect("serialize"), + "\"fastembed\"" + ); + } + + #[test] + fn embedding_backend_deserializes_lowercase_values() { + let openai: EmbeddingBackend = serde_json::from_str("\"openai\"").expect("openai"); + let fastembed: EmbeddingBackend = serde_json::from_str("\"fastembed\"").expect("fastembed"); + let hashed: EmbeddingBackend = serde_json::from_str("\"hashed\"").expect("hashed"); + + assert_eq!(openai, EmbeddingBackend::OpenAI); + assert_eq!(fastembed, EmbeddingBackend::FastEmbed); + assert_eq!(hashed, EmbeddingBackend::Hashed); + } + + #[test] + fn embedding_backend_from_str_accepts_aliases() { + assert_eq!( + "fast-embed".parse::().expect("fast-embed"), + EmbeddingBackend::FastEmbed + ); + assert_eq!( + "FASTEMBED".parse::().expect("FASTEMBED"), + EmbeddingBackend::FastEmbed + ); + assert!(matches!( + "unknown-backend".parse::(), + Err(ParseEmbeddingBackendError { .. }) + )); + } + + #[test] + fn system_settings_deserializes_embedding_backend_field() { + let value = json!({ + "id": "current", + "registrations_enabled": true, + "require_email_verification": false, + "query_model": "gpt-4o-mini", + "processing_model": "gpt-4o-mini", + "embedding_model": "text-embedding-3-small", + "embedding_dimensions": 1536, + "embedding_backend": "hashed", + "query_system_prompt": "query", + "ingestion_system_prompt": "ingestion", + "image_processing_model": "gpt-4o-mini", + "image_processing_prompt": "image", + "voice_processing_model": "whisper-1", + }); + + let settings: SystemSettings = + serde_json::from_value(value).expect("deserialize system settings"); + assert_eq!(settings.embedding_backend, Some(EmbeddingBackend::Hashed)); + } +} diff --git a/ingestion-pipeline/src/pipeline/services.rs b/ingestion-pipeline/src/pipeline/services.rs index c32623c..72f2698 100644 --- a/ingestion-pipeline/src/pipeline/services.rs +++ b/ingestion-pipeline/src/pipeline/services.rs @@ -348,3 +348,84 @@ fn truncate_for_embedding(text: &str, max_chars: usize) -> String { truncated.push('…'); truncated } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use anyhow::Context; + use async_openai::{config::OpenAIConfig, types::ChatCompletionRequestMessage, Client}; + use common::{ + storage::{ + db::SurrealDbClient, + store::StorageManager, + types::system_settings::SystemSettingsPatch, + }, + utils::{ + config::{AppConfig, StorageKind}, + embedding::EmbeddingProvider, + }, + }; + use uuid::Uuid; + + use super::DefaultPipelineServices; + + fn system_prompt_from_request( + request: &async_openai::types::CreateChatCompletionRequest, + ) -> String { + let ChatCompletionRequestMessage::System(system) = &request.messages[0] else { + panic!("expected first message to be system"); + }; + match &system.content { + async_openai::types::ChatCompletionRequestSystemMessageContent::Text(text) => { + text.clone() + } + other => panic!("unexpected system message content: {other:?}"), + } + } + + #[tokio::test] + async fn prepare_llm_request_uses_ingestion_prompt_from_system_settings( + ) -> anyhow::Result<()> { + const SENTINEL: &str = "ingestion-prompt-sentinel-from-db"; + + let db = Arc::new( + SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()) + .await + .context("start in-memory db")?, + ); + db.apply_migrations().await.context("apply migrations")?; + SystemSettingsPatch { + ingestion_system_prompt: Some(SENTINEL.to_string()), + ..Default::default() + } + .apply(&db) + .await + .context("patch ingestion prompt")?; + + let config = AppConfig { + storage: StorageKind::Memory, + ..Default::default() + }; + let storage = StorageManager::new(&config).await.context("storage manager")?; + let openai_client = Arc::new(Client::with_config(OpenAIConfig::default())); + let embedding_provider = Arc::new(EmbeddingProvider::new_hashed(384)?); + + let services = DefaultPipelineServices::new( + db, + openai_client, + config, + None, + storage, + embedding_provider, + ); + + let request = services + .prepare_llm_request("notes", None, "hello world", &[]) + .await + .context("prepare llm request")?; + + assert_eq!(system_prompt_from_request(&request), SENTINEL); + Ok(()) + } +}