diff --git a/common/src/storage/db.rs b/common/src/storage/db.rs index 2b6ec41..c24eb60 100644 --- a/common/src/storage/db.rs +++ b/common/src/storage/db.rs @@ -143,7 +143,7 @@ impl SurrealDbClient { T: StoredObject + Send + Sync + 'static, { self.client - .create((T::table_name(), item.get_id())) + .create((T::table_name(), item.id())) .content(item) .await } @@ -159,7 +159,7 @@ impl SurrealDbClient { where T: StoredObject + Send + Sync + 'static, { - let id = item.get_id().to_string(); + let id = item.id().to_string(); self.client .upsert((T::table_name(), id)) .content(item) diff --git a/common/src/storage/indexes.rs b/common/src/storage/indexes.rs index 5de4c96..7e04f48 100644 --- a/common/src/storage/indexes.rs +++ b/common/src/storage/indexes.rs @@ -581,14 +581,22 @@ async fn poll_index_build_status( Ok(()) } +/// Snapshot of an index build progress as reported by SurrealDB's `INFO FOR INDEX`. #[derive(Debug, PartialEq)] struct IndexBuildSnapshot { + /// Current build status string (e.g., `"indexing"`, `"ready"`, `"error"`). status: String, + /// Number of rows present when the build started. initial: u64, + /// Number of rows still pending processing. pending: u64, + /// Number of rows updated since the build started. updated: u64, + /// Total rows processed so far (`initial + updated`). processed: u64, + /// Total rows expected (from `SELECT count()` before the build), if available. total_rows: Option, + /// Progress as a percentage of `processed / total_rows`, if `total_rows` is known. progress_pct: Option, } diff --git a/common/src/storage/types/analytics.rs b/common/src/storage/types/analytics.rs index 29da3ab..71e1e67 100644 --- a/common/src/storage/types/analytics.rs +++ b/common/src/storage/types/analytics.rs @@ -16,7 +16,7 @@ impl StoredObject for Analytics { "analytics" } - fn get_id(&self) -> &str { + fn id(&self) -> &str { &self.id } } diff --git a/common/src/storage/types/file_info.rs b/common/src/storage/types/file_info.rs index 041f63d..bd1ea95 100644 --- a/common/src/storage/types/file_info.rs +++ b/common/src/storage/types/file_info.rs @@ -19,26 +19,34 @@ use crate::{ stored_object, }; +/// Errors that can occur during file storage operations. #[derive(Error, Debug)] pub enum FileError { + /// No file record found with the given UUID. #[error("file not found for uuid: {0}")] FileNotFound(String), + /// Underlying I/O operation failed. #[error("io error occurred: {0}")] Io(#[from] std::io::Error), + /// A file with the same SHA-256 hash already exists. #[error("duplicate file detected with sha256: {0}")] DuplicateFile(String), + /// Database operation on the file record failed. #[error("surrealdb error: {0}")] SurrealError(#[from] surrealdb::Error), + /// Failed to persist the temporary file to its final location. #[error("failed to persist file: {0}")] PersistError(#[from] tempfile::PersistError), + /// Upload metadata did not include a file name. #[error("file name missing in metadata")] MissingFileName, + /// The underlying object store operation failed. #[error("object store error: {0}")] ObjectStore(#[from] ObjectStoreError), } diff --git a/common/src/storage/types/ingestion_task.rs b/common/src/storage/types/ingestion_task.rs index ad3f48a..aec5ebf 100644 --- a/common/src/storage/types/ingestion_task.rs +++ b/common/src/storage/types/ingestion_task.rs @@ -68,12 +68,16 @@ impl TaskState { } } +/// Information about an error that occurred during task processing. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)] pub struct TaskErrorInfo { + /// Machine-readable error code (e.g., `"pipeline_error"`). pub code: Option, + /// Human-readable error description. pub message: String, } +/// Internal events that drive the task state machine transitions. #[derive(Debug, Clone, Copy)] enum TaskTransition { StartProcessing, diff --git a/common/src/storage/types/message.rs b/common/src/storage/types/message.rs index f478f5a..86cdefa 100644 --- a/common/src/storage/types/message.rs +++ b/common/src/storage/types/message.rs @@ -60,7 +60,11 @@ impl fmt::Display for Message { // helper function to format a vector of messages #[must_use] pub fn format_history(history: &[Message]) -> String { - let mut out = String::new(); + let estimated: usize = history + .iter() + .map(|m| m.content.len() + 10) + .sum(); + let mut out = String::with_capacity(estimated); for (i, msg) in history.iter().enumerate() { if i > 0 { out.push('\n'); diff --git a/common/src/storage/types/mod.rs b/common/src/storage/types/mod.rs index 751c189..02abb1f 100644 --- a/common/src/storage/types/mod.rs +++ b/common/src/storage/types/mod.rs @@ -20,7 +20,7 @@ pub mod user; pub trait StoredObject: Serialize + for<'de> Deserialize<'de> { fn table_name() -> &'static str; - fn get_id(&self) -> &str; + fn id(&self) -> &str; } #[macro_export] @@ -52,7 +52,7 @@ macro_rules! stored_object { $table } - fn get_id(&self) -> &str { + fn id(&self) -> &str { &self.id } } diff --git a/common/src/storage/types/system_settings.rs b/common/src/storage/types/system_settings.rs index 1e11224..ae51e50 100644 --- a/common/src/storage/types/system_settings.rs +++ b/common/src/storage/types/system_settings.rs @@ -28,7 +28,7 @@ impl StoredObject for SystemSettings { "system_settings" } - fn get_id(&self) -> &str { + fn id(&self) -> &str { &self.id } } diff --git a/common/src/utils/ingest_limits.rs b/common/src/utils/ingest_limits.rs index 5d54568..398a0bc 100644 --- a/common/src/utils/ingest_limits.rs +++ b/common/src/utils/ingest_limits.rs @@ -1,8 +1,11 @@ use super::config::AppConfig; +/// Errors raised when validating ingestion payloads against configured limits. #[derive(Debug, Clone, PartialEq, Eq)] pub enum IngestValidationError { + /// The payload exceeds a configured size limit (content, context, or category). PayloadTooLarge(String), + /// The request violates a non-size constraint (e.g., too many files). BadRequest(String), } diff --git a/evaluations/src/corpus/orchestrator.rs b/evaluations/src/corpus/orchestrator.rs index 29a149f..d3e5e80 100644 --- a/evaluations/src/corpus/orchestrator.rs +++ b/evaluations/src/corpus/orchestrator.rs @@ -340,7 +340,7 @@ pub async fn ensure_corpus( corpus_questions.push(CorpusQuestion { question_id: case.question.id.clone(), paragraph_id: case.paragraph.id.clone(), - text_content_id: record.shard.text_content.get_id().to_string(), + text_content_id: record.shard.text_content.id().to_string(), question_text: case.question.question.clone(), answers: case.question.answers.clone(), is_impossible: case.question.is_impossible, diff --git a/evaluations/src/corpus/store.rs b/evaluations/src/corpus/store.rs index 0c46c98..e8bc6a3 100644 --- a/evaluations/src/corpus/store.rs +++ b/evaluations/src/corpus/store.rs @@ -575,7 +575,7 @@ fn validate_answers( if chunk_text.contains(&needle) || (!needle_norm.is_empty() && chunk_norm.contains(&needle_norm)) { - matches.insert(chunk.chunk.get_id().to_string()); + matches.insert(chunk.chunk.id().to_string()); found_any = true; } } diff --git a/evaluations/src/namespace.rs b/evaluations/src/namespace.rs index aa673e6..ff39a98 100644 --- a/evaluations/src/namespace.rs +++ b/evaluations/src/namespace.rs @@ -216,7 +216,7 @@ pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result { theme: Theme::System, }; - if let Some(existing) = db.get_item::(user.get_id()).await? { + if let Some(existing) = db.get_item::(user.id()).await? { return Ok(existing); } diff --git a/evaluations/src/pipeline/stages/run_queries.rs b/evaluations/src/pipeline/stages/run_queries.rs index be79df3..079b1b9 100644 --- a/evaluations/src/pipeline/stages/run_queries.rs +++ b/evaluations/src/pipeline/stages/run_queries.rs @@ -247,7 +247,7 @@ pub(crate) async fn run_queries( || candidate .chunks .iter() - .any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.get_id())) + .any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.id())) } else { true }; diff --git a/evaluations/src/types.rs b/evaluations/src/types.rs index 481b2ae..971a990 100644 --- a/evaluations/src/types.rs +++ b/evaluations/src/types.rs @@ -194,7 +194,7 @@ impl EvaluationCandidate { fn from_entity(entity: RetrievedEntity) -> Self { let entity_category = Some(format!("{:?}", entity.entity.entity_type)); Self { - entity_id: entity.entity.get_id().to_string(), + entity_id: entity.entity.id().to_string(), source_id: entity.entity.source_id.clone(), entity_name: entity.entity.name.clone(), entity_description: Some(entity.entity.description.clone()), @@ -207,7 +207,7 @@ impl EvaluationCandidate { fn from_chunk(chunk: RetrievedChunk) -> Self { let snippet = chunk_snippet(&chunk.chunk.chunk); Self { - entity_id: chunk.chunk.get_id().to_string(), + entity_id: chunk.chunk.id().to_string(), source_id: chunk.chunk.source_id.clone(), entity_name: chunk.chunk.source_id.clone(), entity_description: Some(snippet), @@ -423,11 +423,11 @@ pub fn build_case_diagnostics( let mut chunk_entries = Vec::new(); for chunk in &candidate.chunks { let contains_answer = text_contains_answer(&chunk.chunk.chunk, answers_lower); - let expected_chunk = expected_set.contains(chunk.chunk.get_id()); - seen_chunks.insert(chunk.chunk.get_id().to_string()); - attached_chunk_ids.push(chunk.chunk.get_id().to_string()); + let expected_chunk = expected_set.contains(chunk.chunk.id()); + seen_chunks.insert(chunk.chunk.id().to_string()); + attached_chunk_ids.push(chunk.chunk.id().to_string()); chunk_entries.push(ChunkDiagnosticsEntry { - chunk_id: chunk.chunk.get_id().to_string(), + chunk_id: chunk.chunk.id().to_string(), score: chunk.score, contains_answer, expected_chunk, diff --git a/ingestion-pipeline/src/pipeline/services.rs b/ingestion-pipeline/src/pipeline/services.rs index 1003e0f..42ff717 100644 --- a/ingestion-pipeline/src/pipeline/services.rs +++ b/ingestion-pipeline/src/pipeline/services.rs @@ -240,7 +240,7 @@ impl PipelineServices for DefaultPipelineServices { ) -> Result<(Vec, Vec), AppError> { analysis .to_database_entities( - content.get_id(), + content.id(), &content.user_id, &self.openai_client, &self.db, @@ -271,7 +271,7 @@ impl PipelineServices for DefaultPipelineServices { .await .map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?; let chunk_struct = TextChunk::new( - content.get_id().to_string(), + content.id().to_string(), chunk_text, content.user_id.clone(), ); diff --git a/retrieval-pipeline/src/scoring.rs b/retrieval-pipeline/src/scoring.rs index 5b463e8..b29da7d 100644 --- a/retrieval-pipeline/src/scoring.rs +++ b/retrieval-pipeline/src/scoring.rs @@ -188,7 +188,7 @@ pub fn merge_scored_by_id( T: StoredObject + Clone, { for scored in incoming { - let id = scored.item.get_id().to_owned(); + let id = scored.item.id().to_owned(); target .entry(id) .and_modify(|existing| { @@ -218,7 +218,7 @@ where b.fused .partial_cmp(&a.fused) .unwrap_or(Ordering::Equal) - .then_with(|| a.item.get_id().cmp(b.item.get_id())) + .then_with(|| a.item.id().cmp(b.item.id())) }); } @@ -250,11 +250,11 @@ where b_score .partial_cmp(&a_score) .unwrap_or(Ordering::Equal) - .then_with(|| a.item.get_id().cmp(b.item.get_id())) + .then_with(|| a.item.id().cmp(b.item.id())) }); for (rank, candidate) in vector_ranked.into_iter().enumerate() { - let id = candidate.item.get_id().to_owned(); + let id = candidate.item.id().to_owned(); let entry = merged .entry(id.clone()) .or_insert_with(|| Scored::new(candidate.item.clone())); @@ -280,11 +280,11 @@ where b_score .partial_cmp(&a_score) .unwrap_or(Ordering::Equal) - .then_with(|| a.item.get_id().cmp(b.item.get_id())) + .then_with(|| a.item.id().cmp(b.item.id())) }); for (rank, candidate) in fts_ranked.into_iter().enumerate() { - let id = candidate.item.get_id().to_owned(); + let id = candidate.item.id().to_owned(); let entry = merged .entry(id.clone()) .or_insert_with(|| Scored::new(candidate.item.clone()));