clippy: adhere to pedantic clippy, uniform test error handling

This commit is contained in:
Per Stark
2026-05-26 11:43:45 +02:00
parent 6a5d631287
commit 000852c94c
68 changed files with 2468 additions and 2547 deletions
+1 -1
View File
@@ -180,7 +180,7 @@ impl PipelineServices for DefaultPipelineServices {
);
let rerank_lease = match &self.reranker_pool {
Some(pool) => Some(pool.checkout().await),
Some(pool) => pool.checkout().await,
None => None,
};
@@ -4,7 +4,7 @@ use common::{
error::AppError,
storage::{
db::SurrealDbClient,
indexes::rebuild_indexes,
indexes::rebuild,
types::{
ingestion_payload::IngestionPayload, knowledge_entity::KnowledgeEntity,
knowledge_relationship::KnowledgeRelationship, text_chunk::TextChunk,
@@ -191,7 +191,7 @@ pub async fn persist(
ctx.db.store_item(text_content).await?;
debug!("stored item");
rebuild_indexes(ctx.db).await?;
rebuild(ctx.db).await?;
debug!(
task_id = %ctx.task_id,
@@ -301,8 +301,8 @@ async fn store_chunk_batch(
for embedded in batch {
TextChunk::store_with_embedding(
embedded.chunk.to_owned(),
embedded.embedding.to_owned(),
embedded.chunk.clone(),
embedded.embedding.clone(),
db,
)
.await?;
+62 -72
View File
@@ -1,5 +1,6 @@
use std::sync::Arc;
use anyhow::{self, Context};
use crate::pipeline::context::{EmbeddedKnowledgeEntity, EmbeddedTextChunk};
use async_trait::async_trait;
use chrono::{Duration as ChronoDuration, Utc};
@@ -265,16 +266,12 @@ impl PipelineServices for ValidationServices {
}
}
async fn setup_db() -> SurrealDbClient {
async fn setup_db() -> anyhow::Result<SurrealDbClient> {
let namespace = "pipeline_test";
let database = Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, &database)
.await
.expect("Failed to create in-memory SurrealDB");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
db
let db = SurrealDbClient::memory(namespace, &database).await?;
db.apply_migrations().await?;
Ok(db)
}
fn pipeline_config() -> IngestionConfig {
@@ -295,26 +292,28 @@ async fn reserve_task(
worker_id: &str,
payload: IngestionPayload,
user_id: &str,
) -> IngestionTask {
let task = IngestionTask::create_and_add_to_db(payload, user_id.into(), db)
.await
.expect("task created");
) -> anyhow::Result<IngestionTask> {
let task = IngestionTask::create_and_add_to_db(payload, user_id.into(), db).await?;
let lease = task.lease_duration();
IngestionTask::claim_next_ready(db, worker_id, Utc::now(), lease)
.await
.expect("claim succeeds")
.expect("task claimed")
let claimed = IngestionTask::claim_next_ready(db, worker_id, Utc::now(), lease)
.await?
.context("task claimed")?;
Ok(claimed)
}
#[tokio::test]
async fn ingestion_pipeline_happy_path_persists_entities() {
let db = setup_db().await;
async fn ingestion_pipeline_happy_path_persists_entities() -> anyhow::Result<()>
{
let db = setup_db().await?;
let worker_id = "worker-happy";
let user_id = "user-123";
let services = Arc::new(MockServices::new(user_id));
let pipeline =
IngestionPipeline::with_services(Arc::new(db.clone()), pipeline_config(), services.clone())
.expect("pipeline");
let services_clone: Arc<dyn PipelineServices> = Arc::<MockServices>::clone(&services);
let pipeline = IngestionPipeline::with_services(
Arc::new(db.clone()),
pipeline_config(),
services_clone,
)?;
let task = reserve_task(
&db,
@@ -327,30 +326,22 @@ async fn ingestion_pipeline_happy_path_persists_entities() {
},
user_id,
)
.await;
.await?;
pipeline
.process_task(task.clone())
.await
.expect("pipeline succeeds");
pipeline.process_task(task.clone()).await?;
let stored_task: IngestionTask = db
.get_item(&task.id)
.await
.expect("retrieve task")
.expect("task present");
.await?
.context("task present")?;
assert_eq!(stored_task.state, TaskState::Succeeded);
let stored_entities: Vec<KnowledgeEntity> = db
.get_all_stored_items::<KnowledgeEntity>()
.await
.expect("entities stored");
.await?;
assert!(!stored_entities.is_empty(), "entities should be stored");
let stored_chunks: Vec<TextChunk> = db
.get_all_stored_items::<TextChunk>()
.await
.expect("chunks stored");
let stored_chunks: Vec<TextChunk> = db.get_all_stored_items::<TextChunk>().await?;
assert!(
!stored_chunks.is_empty(),
"chunks should be stored for ingestion text"
@@ -362,22 +353,29 @@ async fn ingestion_pipeline_happy_path_persists_entities() {
"expected at least one chunk embedding call"
);
assert_eq!(
&call_log[0..4],
["prepare", "retrieve", "enrich", "convert"]
call_log.get(0..4),
Some(&["prepare", "retrieve", "enrich", "convert"][..])
);
assert!(call_log[4..].iter().all(|entry| *entry == "chunk"));
assert!(
call_log.get(4..).is_some_and(|tail| tail.iter().all(|entry| *entry == "chunk"))
);
Ok(())
}
#[tokio::test]
async fn ingestion_pipeline_chunk_only_skips_analysis() {
let db = setup_db().await;
async fn ingestion_pipeline_chunk_only_skips_analysis() -> anyhow::Result<()> {
let db = setup_db().await?;
let worker_id = "worker-chunk-only";
let user_id = "user-999";
let services = Arc::new(MockServices::new(user_id));
let services_clone: Arc<dyn PipelineServices> = Arc::<MockServices>::clone(&services);
let mut config = pipeline_config();
config.chunk_only = true;
let pipeline = IngestionPipeline::with_services(Arc::new(db.clone()), config, services.clone())
.expect("pipeline");
let pipeline = IngestionPipeline::with_services(
Arc::new(db.clone()),
config,
services_clone,
)?;
let task = reserve_task(
&db,
@@ -390,17 +388,13 @@ async fn ingestion_pipeline_chunk_only_skips_analysis() {
},
user_id,
)
.await;
.await?;
pipeline
.process_task(task.clone())
.await
.expect("pipeline succeeds");
pipeline.process_task(task.clone()).await?;
let stored_entities: Vec<KnowledgeEntity> = db
.get_all_stored_items::<KnowledgeEntity>()
.await
.expect("entities stored");
.await?;
assert!(
stored_entities.is_empty(),
"chunk-only ingestion should not persist entities"
@@ -408,8 +402,7 @@ async fn ingestion_pipeline_chunk_only_skips_analysis() {
let relationship_count: Option<i64> = db
.client
.query("SELECT count() as count FROM relates_to;")
.await
.expect("query relationships")
.await?
.take::<Option<i64>>(0)
.unwrap_or_default();
assert_eq!(
@@ -417,10 +410,7 @@ async fn ingestion_pipeline_chunk_only_skips_analysis() {
0,
"chunk-only ingestion should not persist relationships"
);
let stored_chunks: Vec<TextChunk> = db
.get_all_stored_items::<TextChunk>()
.await
.expect("chunks stored");
let stored_chunks: Vec<TextChunk> = db.get_all_stored_items::<TextChunk>().await?;
assert!(
!stored_chunks.is_empty(),
"chunk-only ingestion should still persist chunks"
@@ -428,19 +418,19 @@ async fn ingestion_pipeline_chunk_only_skips_analysis() {
let call_log = services.calls.lock().await.clone();
assert_eq!(call_log, vec!["prepare", "chunk"]);
Ok(())
}
#[tokio::test]
async fn ingestion_pipeline_failure_marks_retry() {
let db = setup_db().await;
async fn ingestion_pipeline_failure_marks_retry() -> anyhow::Result<()> {
let db = setup_db().await?;
let worker_id = "worker-fail";
let user_id = "user-456";
let services = Arc::new(FailingServices {
inner: MockServices::new(user_id),
});
let pipeline =
IngestionPipeline::with_services(Arc::new(db.clone()), pipeline_config(), services)
.expect("pipeline");
IngestionPipeline::with_services(Arc::new(db.clone()), pipeline_config(), services)?;
let task = reserve_task(
&db,
@@ -453,7 +443,7 @@ async fn ingestion_pipeline_failure_marks_retry() {
},
user_id,
)
.await;
.await?;
let result = pipeline.process_task(task.clone()).await;
assert!(
@@ -463,38 +453,38 @@ async fn ingestion_pipeline_failure_marks_retry() {
let stored_task: IngestionTask = db
.get_item(&task.id)
.await
.expect("retrieve task")
.expect("task present");
.await?
.context("task present")?;
assert_eq!(stored_task.state, TaskState::Failed);
assert!(
stored_task.scheduled_at > Utc::now() - ChronoDuration::seconds(5),
"failed task should schedule retry in the future"
);
Ok(())
}
#[tokio::test]
async fn ingestion_pipeline_validation_failure_dead_letters_task() {
let db = setup_db().await;
async fn ingestion_pipeline_validation_failure_dead_letters_task(
) -> anyhow::Result<()> {
let db = setup_db().await?;
let worker_id = "worker-validation";
let user_id = "user-789";
let services = Arc::new(ValidationServices);
let pipeline =
IngestionPipeline::with_services(Arc::new(db.clone()), pipeline_config(), services)
.expect("pipeline");
IngestionPipeline::with_services(Arc::new(db.clone()), pipeline_config(), services)?;
let task = reserve_task(
&db,
worker_id,
IngestionPayload::Text {
text: "irrelevant".into(),
context: "".into(),
context: String::new(),
category: "notes".into(),
user_id: user_id.into(),
},
user_id,
)
.await;
.await?;
let result = pipeline.process_task(task.clone()).await;
assert!(
@@ -504,8 +494,8 @@ async fn ingestion_pipeline_validation_failure_dead_letters_task() {
let stored_task: IngestionTask = db
.get_item(&task.id)
.await
.expect("retrieve task")
.expect("task present");
.await?
.context("task present")?;
assert_eq!(stored_task.state, TaskState::DeadLetter);
Ok(())
}
@@ -155,21 +155,20 @@ mod tests {
};
#[tokio::test]
async fn extracts_text_using_memory_storage_backend() {
let mut config = AppConfig::default();
config.storage = StorageKind::Memory;
async fn extracts_text_using_memory_storage_backend() -> anyhow::Result<()> {
let config = AppConfig {
storage: StorageKind::Memory,
..Default::default()
};
let storage = StorageManager::new(&config)
.await
.expect("create storage manager");
let storage = StorageManager::new(&config).await?;
let location = "user/test/file.txt";
let contents = b"hello from memory storage";
storage
.put(location, Bytes::from(contents.as_slice().to_vec()))
.await
.expect("write object");
.await?;
let now = Utc::now();
let file_info = FileInfo {
@@ -185,16 +184,14 @@ mod tests {
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("create surreal memory");
let db = SurrealDbClient::memory(namespace, database).await?;
let openai_client = Client::with_config(OpenAIConfig::default());
let text = extract_text_from_file(&file_info, &db, &openai_client, &config, &storage)
.await
.expect("extract text");
.await?;
assert_eq!(text, String::from_utf8_lossy(contents));
Ok(())
}
}
@@ -715,6 +715,7 @@ const fn prompt_for_attempt(attempt: usize, base_prompt: &str) -> &str {
#[cfg(test)]
mod tests {
use super::*;
use anyhow::{self};
#[test]
fn test_looks_good_enough_short_text() {
@@ -737,15 +738,16 @@ mod tests {
}
#[test]
fn test_debug_dump_directory_env_var() {
fn test_debug_dump_directory_env_var() -> anyhow::Result<()> {
std::env::remove_var(DEBUG_IMAGE_ENV_VAR);
assert!(debug_dump_directory().is_none());
std::env::set_var(DEBUG_IMAGE_ENV_VAR, "/tmp/minne_pdf_debug");
let dir = debug_dump_directory().expect("expected debug directory");
let dir = debug_dump_directory().ok_or_else(|| anyhow::anyhow!("expected debug directory"))?;
assert_eq!(dir, PathBuf::from("/tmp/minne_pdf_debug"));
std::env::remove_var(DEBUG_IMAGE_ENV_VAR);
Ok(())
}
#[test]
@@ -142,29 +142,34 @@ fn ensure_ingestion_url_allowed(url: &url::Url) -> Result<String, AppError> {
#[cfg(test)]
mod tests {
use super::*;
use anyhow::{self};
#[test]
fn rejects_unsupported_scheme() {
let url = url::Url::parse("ftp://example.com").expect("url");
fn rejects_unsupported_scheme() -> anyhow::Result<()> {
let url = url::Url::parse("ftp://example.com")?;
assert!(ensure_ingestion_url_allowed(&url).is_err());
Ok(())
}
#[test]
fn rejects_localhost() {
let url = url::Url::parse("http://localhost/resource").expect("url");
fn rejects_localhost() -> anyhow::Result<()> {
let url = url::Url::parse("http://localhost/resource")?;
assert!(ensure_ingestion_url_allowed(&url).is_err());
Ok(())
}
#[test]
fn rejects_private_ipv4() {
let url = url::Url::parse("http://192.168.1.10/index.html").expect("url");
fn rejects_private_ipv4() -> anyhow::Result<()> {
let url = url::Url::parse("http://192.168.1.10/index.html")?;
assert!(ensure_ingestion_url_allowed(&url).is_err());
Ok(())
}
#[test]
fn allows_public_domain_and_sanitizes() {
let url = url::Url::parse("https://sub.example.com/path").expect("url");
let sanitized = ensure_ingestion_url_allowed(&url).expect("allowed");
fn allows_public_domain_and_sanitizes() -> anyhow::Result<()> {
let url = url::Url::parse("https://sub.example.com/path")?;
let sanitized = ensure_ingestion_url_allowed(&url)?;
assert_eq!(sanitized, "sub_example_com");
Ok(())
}
}