From 31d585b59f14244e23b070706df4733b5c09820a Mon Sep 17 00:00:00 2001 From: Per Stark Date: Wed, 27 May 2026 13:33:02 +0200 Subject: [PATCH] chore: removed anyhow from apperror for improved error handling --- common/src/error.rs | 6 ++-- common/src/storage/types/file_info.rs | 6 ++-- common/src/utils/embedding.rs | 2 +- .../src/pipeline/enrichment_result.rs | 4 +-- ingestion-pipeline/src/pipeline/services.rs | 4 +-- .../src/utils/file_text_extraction.rs | 3 +- .../src/utils/url_text_retrieval.rs | 36 ++++++++++++------- 7 files changed, 36 insertions(+), 25 deletions(-) diff --git a/common/src/error.rs b/common/src/error.rs index 873921a..2a69a6d 100644 --- a/common/src/error.rs +++ b/common/src/error.rs @@ -26,12 +26,12 @@ pub enum AppError { Join(#[from] JoinError), #[error("Graph mapper error: {0}")] GraphMapper(String), - #[error("IoError: {0}")] + #[error("IO error: {0}")] Io(#[from] std::io::Error), #[error("Reqwest error: {0}")] Reqwest(#[from] reqwest::Error), - #[error("Anyhow error: {0}")] - Anyhow(#[from] anyhow::Error), + #[error("Storage error: {0}")] + Storage(#[from] object_store::Error), #[error("Ingestion Processing error: {0}")] Processing(String), #[error("DOM smoothie error: {0}")] diff --git a/common/src/storage/types/file_info.rs b/common/src/storage/types/file_info.rs index 12b0e73..508555e 100644 --- a/common/src/storage/types/file_info.rs +++ b/common/src/storage/types/file_info.rs @@ -253,11 +253,11 @@ impl FileInfo { // Remove the object's parent prefix in the object store let (parent_prefix, _file_name) = store::split_object_path(&file_info.path) - .map_err(|e| AppError::from(anyhow::anyhow!(e)))?; + .map_err(|e| AppError::InternalError(e.to_string()))?; storage .delete_prefix(&parent_prefix) .await - .map_err(|e| AppError::from(anyhow::anyhow!(e)))?; + .map_err(AppError::Storage)?; info!( "Removed object prefix {} and its contents via StorageManager", parent_prefix @@ -283,7 +283,7 @@ impl FileInfo { storage .get(&self.path) .await - .map_err(|e: object_store::Error| AppError::from(anyhow::anyhow!(e))) + .map_err(AppError::Storage) } /// Persist file to storage using StorageManager. diff --git a/common/src/utils/embedding.rs b/common/src/utils/embedding.rs index ae4c33a..143800f 100644 --- a/common/src/utils/embedding.rs +++ b/common/src/utils/embedding.rs @@ -311,7 +311,7 @@ pub async fn generate_embedding_with_provider( provider: &EmbeddingProvider, input: &str, ) -> Result, AppError> { - provider.embed(input).await.map_err(AppError::from) + provider.embed(input).await.map_err(|e| AppError::InternalError(e.to_string())) } /// Generates an embedding vector for the given input text using `OpenAI`'s embedding model. diff --git a/ingestion-pipeline/src/pipeline/enrichment_result.rs b/ingestion-pipeline/src/pipeline/enrichment_result.rs index ed1f6c9..a2003b2 100644 --- a/ingestion-pipeline/src/pipeline/enrichment_result.rs +++ b/ingestion-pipeline/src/pipeline/enrichment_result.rs @@ -4,7 +4,7 @@ use chrono::Utc; use futures::stream::{self, StreamExt, TryStreamExt}; use serde::{Deserialize, Serialize}; -use anyhow::Context; + use common::{ error::AppError, storage::{ @@ -161,7 +161,7 @@ async fn create_single_entity( provider .embed(&embedding_input) .await - .context("generating FastEmbed embedding for entity")? + .map_err(|e| AppError::InternalError(format!("FastEmbed embedding for entity failed: {e}")))? } else { generate_embedding(openai_client, &embedding_input, db_client).await? }; diff --git a/ingestion-pipeline/src/pipeline/services.rs b/ingestion-pipeline/src/pipeline/services.rs index ccd036e..1003e0f 100644 --- a/ingestion-pipeline/src/pipeline/services.rs +++ b/ingestion-pipeline/src/pipeline/services.rs @@ -3,7 +3,7 @@ use std::{ sync::{Arc, OnceLock}, }; -use anyhow::Context; + use async_openai::types::{ ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage, CreateChatCompletionRequest, CreateChatCompletionRequestArgs, ResponseFormat, @@ -269,7 +269,7 @@ impl PipelineServices for DefaultPipelineServices { .embedding_provider .embed(&chunk_text) .await - .context("generating FastEmbed embedding for chunk")?; + .map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?; let chunk_struct = TextChunk::new( content.get_id().to_string(), chunk_text, diff --git a/ingestion-pipeline/src/utils/file_text_extraction.rs b/ingestion-pipeline/src/utils/file_text_extraction.rs index 74bf6eb..9cc95bf 100644 --- a/ingestion-pipeline/src/utils/file_text_extraction.rs +++ b/ingestion-pipeline/src/utils/file_text_extraction.rs @@ -1,4 +1,3 @@ -use anyhow::anyhow; use common::{ error::AppError, storage::{db::SurrealDbClient, store::StorageManager, types::file_info::FileInfo}, @@ -78,7 +77,7 @@ pub async fn extract_text_from_file( let file_bytes = storage .get(&file_info.path) .await - .map_err(|e| AppError::from(anyhow!(e)))?; + .map_err(AppError::Storage)?; let local_path = resolve_existing_local_path(storage, &file_info.path).await; match file_info.mime_type.as_str() { diff --git a/ingestion-pipeline/src/utils/url_text_retrieval.rs b/ingestion-pipeline/src/utils/url_text_retrieval.rs index a6ea67e..e399b90 100644 --- a/ingestion-pipeline/src/utils/url_text_retrieval.rs +++ b/ingestion-pipeline/src/utils/url_text_retrieval.rs @@ -30,24 +30,36 @@ pub async fn extract_text_from_url( .sandbox(false) .build() .map_err(|e| AppError::InternalError(e.to_string()))?; - Browser::new(options)? + Browser::new(options) + .map_err(|e| AppError::InternalError(e.to_string()))? } #[cfg(not(feature = "docker"))] { - Browser::default()? + Browser::default() + .map_err(|e| AppError::InternalError(e.to_string()))? } }; - let tab = browser.new_tab()?; - let page = tab.navigate_to(url)?; - let loaded_page = page.wait_until_navigated()?; - let raw_content = loaded_page.get_content()?; - let screenshot = loaded_page.capture_screenshot( - headless_chrome::protocol::cdp::Page::CaptureScreenshotFormatOption::Jpeg, - None, - None, - true, - )?; + let tab = browser + .new_tab() + .map_err(|e| AppError::InternalError(e.to_string()))?; + let page = tab + .navigate_to(url) + .map_err(|e| AppError::InternalError(e.to_string()))?; + let loaded_page = page + .wait_until_navigated() + .map_err(|e| AppError::InternalError(e.to_string()))?; + let raw_content = loaded_page + .get_content() + .map_err(|e| AppError::InternalError(e.to_string()))?; + let screenshot = loaded_page + .capture_screenshot( + headless_chrome::protocol::cdp::Page::CaptureScreenshotFormatOption::Jpeg, + None, + None, + true, + ) + .map_err(|e| AppError::InternalError(e.to_string()))?; let mut tmp_file = NamedTempFile::new()?; let temp_path_str = tmp_file.path().display().to_string();