chore: lowercase all error messages and add # Errors doc sections

- Fix err-lowercase-msg: normalize all #[error(...)] display strings to
  lowercase (AppError, FileError, ApiErr) and update affected tests
- Fix err-doc-errors: add # Errors sections to 25+ fallible public
  functions across db.rs, store.rs, embedding.rs, indexes.rs,
  ingestion_task.rs, and ingest_limits.rs
This commit is contained in:
Per Stark
2026-05-27 14:59:48 +02:00
parent 4579725130
commit 224a7db451
9 changed files with 221 additions and 54 deletions
+8 -8
View File
@@ -9,19 +9,19 @@ use thiserror::Error;
#[derive(Error, Debug, Serialize, Clone)]
pub enum ApiErr {
#[error("Internal server error")]
#[error("internal server error")]
InternalError(String),
#[error("Validation error: {0}")]
#[error("validation error: {0}")]
ValidationError(String),
#[error("Not found: {0}")]
#[error("not found: {0}")]
NotFound(String),
#[error("Unauthorized: {0}")]
#[error("unauthorized: {0}")]
Unauthorized(String),
#[error("Payload too large: {0}")]
#[error("payload too large: {0}")]
PayloadTooLarge(String),
}
@@ -157,12 +157,12 @@ mod tests {
let error = ApiErr::ValidationError(message.to_string());
// Check that the error itself contains the message
assert_eq!(error.to_string(), format!("Validation error: {message}"));
assert_eq!(error.to_string(), format!("validation error: {message}"));
// For not found errors
let message = "user not found";
let error = ApiErr::NotFound(message.to_string());
assert_eq!(error.to_string(), format!("Not found: {message}"));
assert_eq!(error.to_string(), format!("not found: {message}"));
}
// Alternative approach for internal error test
@@ -175,7 +175,7 @@ mod tests {
let api_error = ApiErr::InternalError(sensitive_info.to_string());
// Check the error message is correctly set
assert_eq!(api_error.to_string(), "Internal server error");
assert_eq!(api_error.to_string(), "internal server error");
// Also verify correct status code
assert_status_code(api_error, StatusCode::INTERNAL_SERVER_ERROR);
+15 -15
View File
@@ -8,34 +8,34 @@ use crate::storage::types::file_info::FileError;
#[allow(clippy::module_name_repetitions)]
#[derive(Error, Debug)]
pub enum AppError {
#[error("Database error: {0}")]
#[error("database error: {0}")]
Database(#[from] surrealdb::Error),
#[error("OpenAI error: {0}")]
#[error("openai error: {0}")]
OpenAI(#[from] OpenAIError),
#[error("File error: {0}")]
#[error("file error: {0}")]
File(#[from] FileError),
#[error("Not found: {0}")]
#[error("not found: {0}")]
NotFound(String),
#[error("Validation error: {0}")]
#[error("validation error: {0}")]
Validation(String),
#[error("Authorization error: {0}")]
#[error("authorization error: {0}")]
Auth(String),
#[error("LLM parsing error: {0}")]
#[error("llm parsing error: {0}")]
LLMParsing(String),
#[error("Task join error: {0}")]
#[error("task join error: {0}")]
Join(#[from] JoinError),
#[error("Graph mapper error: {0}")]
#[error("graph mapper error: {0}")]
GraphMapper(String),
#[error("IO error: {0}")]
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("Reqwest error: {0}")]
#[error("reqwest error: {0}")]
Reqwest(#[from] reqwest::Error),
#[error("Storage error: {0}")]
#[error("storage error: {0}")]
Storage(#[from] object_store::Error),
#[error("Ingestion Processing error: {0}")]
#[error("ingestion processing error: {0}")]
Processing(String),
#[error("DOM smoothie error: {0}")]
#[error("dom smoothie error: {0}")]
DomSmoothie(#[from] dom_smoothie::ReadabilityError),
#[error("Internal service error: {0}")]
#[error("internal service error: {0}")]
InternalError(String),
}
+68 -23
View File
@@ -26,12 +26,20 @@ pub trait ProvidesDb {
}
impl SurrealDbClient {
/// # Initialize a new datbase client
/// Initialize a new database client.
///
/// # Arguments
///
/// # Returns
/// * `SurrealDbClient` initialized
/// * `address` — Database connection string (e.g. `ws://localhost:8000` or `mem://`).
/// * `username` — Root username for authentication.
/// * `password` — Root password for authentication.
/// * `namespace` — SurrealDB namespace to use.
/// * `database` — SurrealDB database to use.
///
/// # Errors
///
/// Returns `Err` if the connection, authentication, or namespace/database selection fails.
/// In-memory (`mem://`) connections skip authentication.
pub async fn new(
address: &str,
username: &str,
@@ -52,6 +60,19 @@ impl SurrealDbClient {
Ok(SurrealDbClient { client: db })
}
/// Initialize a new database client using namespace-level authentication.
///
/// # Arguments
///
/// * `address` — Database connection string.
/// * `namespace` — SurrealDB namespace to use (also used for auth).
/// * `username` — Namespace username for authentication.
/// * `password` — Namespace password for authentication.
/// * `database` — SurrealDB database to use.
///
/// # Errors
///
/// Returns `Err` if the connection, namespace authentication, or namespace/database selection fails.
pub async fn new_with_namespace_user(
address: &str,
namespace: &str,
@@ -70,6 +91,11 @@ impl SurrealDbClient {
Ok(SurrealDbClient { client: db })
}
/// Create an Axum session store backed by SurrealDB.
///
/// # Errors
///
/// Returns `SessionError` if the session store configuration or table creation fails.
pub async fn create_session_store(
&self,
) -> Result<SessionStore<SessionSurrealPool<Any>>, SessionError> {
@@ -88,6 +114,10 @@ impl SurrealDbClient {
/// This function should be called during application startup, after connecting to
/// the database and selecting the appropriate namespace and database, but before
/// the application starts performing operations that rely on the schema.
///
/// # Errors
///
/// Returns `AppError::InternalError` if the migration runner fails to apply any migration.
pub async fn apply_migrations(&self) -> Result<(), AppError> {
debug!("Applying migrations");
MigrationRunner::new(&self.client)
@@ -99,13 +129,15 @@ impl SurrealDbClient {
Ok(())
}
/// Operation to store a object in SurrealDB, requires the struct to implement StoredObject
/// Store an object in SurrealDB.
///
/// # Arguments
/// * `item` - The item to be stored
///
/// # Returns
/// * `Result` - Item or Error
/// * `item` — The item to store. Must implement `StoredObject`.
///
/// # Errors
///
/// Returns `Err` if the database create operation fails.
pub async fn store_item<T>(&self, item: T) -> Result<Option<T>, Error>
where
T: StoredObject + Send + Sync + 'static,
@@ -116,8 +148,13 @@ impl SurrealDbClient {
.await
}
/// Operation to upsert an object in SurrealDB, replacing any existing record
/// with the same ID. Useful for idempotent ingestion flows.
/// Upsert an object in SurrealDB, replacing any existing record with the same ID.
///
/// Useful for idempotent ingestion flows.
///
/// # Errors
///
/// Returns `Err` if the database upsert operation fails.
pub async fn upsert_item<T>(&self, item: T) -> Result<Option<T>, Error>
where
T: StoredObject + Send + Sync + 'static,
@@ -129,10 +166,11 @@ impl SurrealDbClient {
.await
}
/// Operation to retrieve all objects from a certain table, requires the struct to implement StoredObject
/// Retrieve all objects from a table.
///
/// # Returns
/// * `Result` - Vec<T> or Error
/// # Errors
///
/// Returns `Err` if the database select operation fails.
pub async fn get_all_stored_items<T>(&self) -> Result<Vec<T>, Error>
where
T: for<'de> StoredObject,
@@ -140,13 +178,16 @@ impl SurrealDbClient {
self.client.select(T::table_name()).await
}
/// Operation to retrieve a single object by its ID, requires the struct to implement StoredObject
/// Retrieve a single object by its ID.
///
/// # Arguments
/// * `id` - The ID of the item to retrieve
///
/// # Returns
/// * `Result<Option<T>, Error>` - The found item or Error
/// * `id` — The ID of the item to retrieve.
///
/// # Errors
///
/// Returns `Err` if the database select operation fails.
/// Returns `Ok(None)` if no record with the given ID exists.
pub async fn get_item<T>(&self, id: &str) -> Result<Option<T>, Error>
where
T: for<'de> StoredObject,
@@ -154,13 +195,16 @@ impl SurrealDbClient {
self.client.select((T::table_name(), id)).await
}
/// Operation to delete a single object by its ID, requires the struct to implement StoredObject
/// Delete a single object by its ID.
///
/// # Arguments
/// * `id` - The ID of the item to delete
///
/// # Returns
/// * `Result<Option<T>, Error>` - The deleted item or Error
/// * `id` — The ID of the item to delete.
///
/// # Errors
///
/// Returns `Err` if the database delete operation fails.
/// Returns `Ok(None)` if no record with the given ID exists.
pub async fn delete_item<T>(&self, id: &str) -> Result<Option<T>, Error>
where
T: for<'de> StoredObject,
@@ -168,10 +212,11 @@ impl SurrealDbClient {
self.client.delete((T::table_name(), id)).await
}
/// Operation to listen to a table for updates, requires the struct to implement StoredObject
/// Listen to a table for real-time updates via a live query stream.
///
/// # Returns
/// * `Result<Option<T>, Error>` - The deleted item or Error
/// # Errors
///
/// Returns `Err` if the database live query subscription fails.
pub async fn listen<T>(
&self,
) -> Result<impl Stream<Item = Result<Notification<T>, Error>>, Error>
+8
View File
@@ -159,6 +159,10 @@ impl FtsIndexSpec {
/// Build runtime Surreal indexes (FTS + HNSW) using concurrent creation with readiness polling.
/// Idempotent: safe to call multiple times and will overwrite HNSW definitions when the dimension changes.
///
/// # Errors
///
/// Returns `AppError::InternalError` if any index definition or polling step fails.
pub async fn ensure_runtime(
db: &SurrealDbClient,
embedding_dimension: usize,
@@ -169,6 +173,10 @@ pub async fn ensure_runtime(
}
/// Rebuild known FTS and HNSW indexes, skipping any that are not yet defined.
///
/// # Errors
///
/// Returns `AppError::InternalError` if any index rebuild operation fails.
pub async fn rebuild(db: &SurrealDbClient) -> Result<(), AppError> {
rebuild_inner(db)
.await
+29
View File
@@ -31,6 +31,11 @@ impl StorageManager {
///
/// This method validates the configuration and creates the appropriate
/// storage backend with proper initialization.
///
/// # Errors
///
/// Returns `Err` if the storage backend cannot be created or initialised
/// (e.g. missing S3 bucket, local filesystem permission error).
pub async fn new(cfg: &AppConfig) -> object_store::Result<Self> {
let backend_kind = cfg.storage;
let (store, local_base) = create_storage_backend(cfg).await?;
@@ -90,6 +95,10 @@ impl StorageManager {
///
/// This operation persists data using the underlying storage backend.
/// For memory backends, data persists for the lifetime of the StorageManager.
///
/// # Errors
///
/// Returns `Err` if the underlying storage backend fails to persist the data.
pub async fn put(&self, location: &str, data: Bytes) -> object_store::Result<()> {
let path = ObjPath::from(location);
let payload = object_store::PutPayload::from_bytes(data);
@@ -99,6 +108,10 @@ impl StorageManager {
/// Retrieve bytes from the specified location.
///
/// Returns the full contents buffered in memory.
///
/// # Errors
///
/// Returns `Err` if the location does not exist or the underlying backend fails.
pub async fn get(&self, location: &str) -> object_store::Result<Bytes> {
let path = ObjPath::from(location);
let result = self.store.get(&path).await?;
@@ -108,6 +121,10 @@ impl StorageManager {
/// Get a streaming handle for large objects.
///
/// Returns a fallible stream of Bytes chunks suitable for large file processing.
///
/// # Errors
///
/// Returns `Err` if the location does not exist or the underlying backend fails.
pub async fn get_stream(
&self,
location: &str,
@@ -120,6 +137,10 @@ impl StorageManager {
/// Delete all objects below the specified prefix.
///
/// For local filesystem backends, this also attempts to clean up empty directories.
///
/// # Errors
///
/// Returns `Err` if the underlying backend fails during deletion.
pub async fn delete_prefix(&self, prefix: &str) -> object_store::Result<()> {
let prefix_path = ObjPath::from(prefix);
let locations = self
@@ -141,6 +162,10 @@ impl StorageManager {
}
/// List all objects below the specified prefix.
///
/// # Errors
///
/// Returns `Err` if the underlying backend fails to list objects.
pub async fn list(
&self,
prefix: Option<&str>,
@@ -150,6 +175,10 @@ impl StorageManager {
}
/// Check if an object exists at the specified location.
///
/// # Errors
///
/// Returns `Err` if the underlying backend returns a non-NotFound error.
pub async fn exists(&self, location: &str) -> object_store::Result<bool> {
let path = ObjPath::from(location);
self.store
+7 -7
View File
@@ -21,25 +21,25 @@ use crate::{
#[derive(Error, Debug)]
pub enum FileError {
#[error("File not found for UUID: {0}")]
#[error("file not found for uuid: {0}")]
FileNotFound(String),
#[error("IO error occurred: {0}")]
#[error("io error occurred: {0}")]
Io(#[from] std::io::Error),
#[error("Duplicate file detected with SHA256: {0}")]
#[error("duplicate file detected with sha256: {0}")]
DuplicateFile(String),
#[error("SurrealDB error: {0}")]
#[error("surrealdb error: {0}")]
SurrealError(#[from] surrealdb::Error),
#[error("Failed to persist file: {0}")]
#[error("failed to persist file: {0}")]
PersistError(#[from] tempfile::PersistError),
#[error("File name missing in metadata")]
#[error("file name missing in metadata")]
MissingFileName,
#[error("Object store error: {0}")]
#[error("object store error: {0}")]
ObjectStore(#[from] ObjectStoreError),
}
@@ -207,6 +207,11 @@ impl IngestionTask {
Duration::from_secs(u64::try_from(self.lease_duration_secs.max(0)).unwrap_or(0))
}
/// Create a new task and immediately persist it to the database.
///
/// # Errors
///
/// Returns `AppError::Database` if the store operation fails.
pub async fn create_and_add_to_db(
content: IngestionPayload,
user_id: String,
@@ -217,6 +222,14 @@ impl IngestionTask {
Ok(task)
}
/// Claim the next ready task for processing.
///
/// Atomically reserves a task by transitioning it from a candidate state to `Reserved`.
/// Returns `Ok(None)` if no task is ready to claim.
///
/// # Errors
///
/// Returns `AppError::Database` if the update query fails.
pub async fn claim_next_ready(
db: &SurrealDbClient,
worker_id: &str,
@@ -291,6 +304,12 @@ impl IngestionTask {
Ok(task)
}
/// Transition this task from `Reserved` to `Processing`.
///
/// # Errors
///
/// Returns `AppError::Validation` if the task is not in `Reserved` state
/// or belongs to a different worker. Returns `AppError::Database` on DB failure.
pub async fn mark_processing(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
const START_PROCESSING_QUERY: &str = r#"
UPDATE type::thing($table, $id)
@@ -317,6 +336,12 @@ impl IngestionTask {
updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::StartProcessing))
}
/// Transition this task from `Processing` to `Succeeded`.
///
/// # Errors
///
/// Returns `AppError::Validation` if the task is not in `Processing` state
/// or belongs to a different worker. Returns `AppError::Database` on DB failure.
pub async fn mark_succeeded(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
const COMPLETE_QUERY: &str = r#"
UPDATE type::thing($table, $id)
@@ -348,6 +373,14 @@ impl IngestionTask {
updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Succeed))
}
/// Transition this task from `Processing` to `Failed`.
///
/// The task will be rescheduled for retry after `retry_delay`.
///
/// # Errors
///
/// Returns `AppError::Validation` if the task is not in `Processing` state
/// or belongs to a different worker. Returns `AppError::Database` on DB failure.
pub async fn mark_failed(
&self,
error: TaskErrorInfo,
@@ -394,6 +427,12 @@ impl IngestionTask {
updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Fail))
}
/// Transition this task from `Failed` to `DeadLetter`.
///
/// # Errors
///
/// Returns `AppError::Validation` if the task is not in `Failed` state.
/// Returns `AppError::Database` on DB failure.
pub async fn mark_dead_letter(
&self,
error: TaskErrorInfo,
@@ -430,6 +469,12 @@ impl IngestionTask {
updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::DeadLetter))
}
/// Transition this task to `Cancelled` from any non-terminal state.
///
/// # Errors
///
/// Returns `AppError::Validation` if the task is in a terminal state.
/// Returns `AppError::Database` on DB failure.
pub async fn mark_cancelled(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
const CANCEL_QUERY: &str = r#"
UPDATE type::thing($table, $id)
@@ -463,6 +508,12 @@ impl IngestionTask {
updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Cancel))
}
/// Release a reserved task back to `Pending` state.
///
/// # Errors
///
/// Returns `AppError::Validation` if the task is not in `Reserved` state.
/// Returns `AppError::Database` on DB failure.
pub async fn release(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
const RELEASE_QUERY: &str = r#"
UPDATE type::thing($table, $id)
@@ -489,6 +540,11 @@ impl IngestionTask {
updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Release))
}
/// Retrieve all non-terminal tasks across active states.
///
/// # Errors
///
/// Returns `AppError::Database` if the query fails.
pub async fn get_unfinished_tasks(
db: &SurrealDbClient,
) -> Result<Vec<IngestionTask>, AppError> {
+21 -1
View File
@@ -106,6 +106,12 @@ impl EmbeddingProvider {
}
}
/// Generate an embedding vector for the given text.
///
/// # Errors
///
/// Returns `Err` if the backend API call fails, FastEmbed initialisation fails,
/// or the backend returns no embedding data.
pub async fn embed(&self, text: &str) -> Result<Vec<f32>> {
match &self.inner {
EmbeddingInner::Hashed { dimension } => Ok(hashed_embedding(text, *dimension)),
@@ -144,6 +150,12 @@ impl EmbeddingProvider {
}
}
/// Generate embedding vectors for a batch of texts.
///
/// # Errors
///
/// Returns `Err` if the backend API call fails or returns no embedding data.
/// Returns an empty `Vec` when `texts` is empty.
pub async fn embed_batch(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>> {
match &self.inner {
EmbeddingInner::Hashed { dimension } => Ok(texts
@@ -309,7 +321,11 @@ fn bucket(token: &str, dimension: usize) -> usize {
usize::try_from(hasher.finish()).unwrap_or_default() % safe_dimension
}
// Backward compatibility function
/// Generate an embedding using the given provider.
///
/// # Errors
///
/// Returns `AppError::InternalError` if the provider's embed call fails.
pub async fn generate_embedding_with_provider(
provider: &EmbeddingProvider,
input: &str,
@@ -372,6 +388,10 @@ pub async fn generate_embedding(
///
/// This is used for the re-embedding process where the model and dimensions
/// are known ahead of time and shouldn't be repeatedly fetched from settings.
///
/// # Errors
///
/// Returns `AppError` if the OpenAI API request fails or returns no embedding data.
pub async fn generate_embedding_with_params(
client: &async_openai::Client<async_openai::config::OpenAIConfig>,
input: &str,
+9
View File
@@ -6,6 +6,15 @@ pub enum IngestValidationError {
BadRequest(String),
}
/// Validates ingestion input against configured limits.
///
/// Checks file count, content size, context size, and category length.
///
/// # Errors
///
/// Returns `IngestValidationError::BadRequest` if the file count exceeds the maximum.
/// Returns `IngestValidationError::PayloadTooLarge` if content, context, or
/// category exceed their configured byte limits.
pub fn validate_ingest_input(
config: &AppConfig,
content: Option<&str>,