chore: rename get_id to id, add doc comments, pre-allocate format_history

This commit is contained in:
Per Stark
2026-05-27 18:06:16 +02:00
parent 224a7db451
commit 30bb59f243
16 changed files with 52 additions and 25 deletions
+2 -2
View File
@@ -143,7 +143,7 @@ impl SurrealDbClient {
T: StoredObject + Send + Sync + 'static, T: StoredObject + Send + Sync + 'static,
{ {
self.client self.client
.create((T::table_name(), item.get_id())) .create((T::table_name(), item.id()))
.content(item) .content(item)
.await .await
} }
@@ -159,7 +159,7 @@ impl SurrealDbClient {
where where
T: StoredObject + Send + Sync + 'static, T: StoredObject + Send + Sync + 'static,
{ {
let id = item.get_id().to_string(); let id = item.id().to_string();
self.client self.client
.upsert((T::table_name(), id)) .upsert((T::table_name(), id))
.content(item) .content(item)
+8
View File
@@ -581,14 +581,22 @@ async fn poll_index_build_status(
Ok(()) Ok(())
} }
/// Snapshot of an index build progress as reported by SurrealDB's `INFO FOR INDEX`.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
struct IndexBuildSnapshot { struct IndexBuildSnapshot {
/// Current build status string (e.g., `"indexing"`, `"ready"`, `"error"`).
status: String, status: String,
/// Number of rows present when the build started.
initial: u64, initial: u64,
/// Number of rows still pending processing.
pending: u64, pending: u64,
/// Number of rows updated since the build started.
updated: u64, updated: u64,
/// Total rows processed so far (`initial + updated`).
processed: u64, processed: u64,
/// Total rows expected (from `SELECT count()` before the build), if available.
total_rows: Option<u64>, total_rows: Option<u64>,
/// Progress as a percentage of `processed / total_rows`, if `total_rows` is known.
progress_pct: Option<f64>, progress_pct: Option<f64>,
} }
+1 -1
View File
@@ -16,7 +16,7 @@ impl StoredObject for Analytics {
"analytics" "analytics"
} }
fn get_id(&self) -> &str { fn id(&self) -> &str {
&self.id &self.id
} }
} }
+8
View File
@@ -19,26 +19,34 @@ use crate::{
stored_object, stored_object,
}; };
/// Errors that can occur during file storage operations.
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum FileError { pub enum FileError {
/// No file record found with the given UUID.
#[error("file not found for uuid: {0}")] #[error("file not found for uuid: {0}")]
FileNotFound(String), FileNotFound(String),
/// Underlying I/O operation failed.
#[error("io error occurred: {0}")] #[error("io error occurred: {0}")]
Io(#[from] std::io::Error), Io(#[from] std::io::Error),
/// A file with the same SHA-256 hash already exists.
#[error("duplicate file detected with sha256: {0}")] #[error("duplicate file detected with sha256: {0}")]
DuplicateFile(String), DuplicateFile(String),
/// Database operation on the file record failed.
#[error("surrealdb error: {0}")] #[error("surrealdb error: {0}")]
SurrealError(#[from] surrealdb::Error), SurrealError(#[from] surrealdb::Error),
/// Failed to persist the temporary file to its final location.
#[error("failed to persist file: {0}")] #[error("failed to persist file: {0}")]
PersistError(#[from] tempfile::PersistError), PersistError(#[from] tempfile::PersistError),
/// Upload metadata did not include a file name.
#[error("file name missing in metadata")] #[error("file name missing in metadata")]
MissingFileName, MissingFileName,
/// The underlying object store operation failed.
#[error("object store error: {0}")] #[error("object store error: {0}")]
ObjectStore(#[from] ObjectStoreError), ObjectStore(#[from] ObjectStoreError),
} }
@@ -68,12 +68,16 @@ impl TaskState {
} }
} }
/// Information about an error that occurred during task processing.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)] #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)]
pub struct TaskErrorInfo { pub struct TaskErrorInfo {
/// Machine-readable error code (e.g., `"pipeline_error"`).
pub code: Option<String>, pub code: Option<String>,
/// Human-readable error description.
pub message: String, pub message: String,
} }
/// Internal events that drive the task state machine transitions.
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
enum TaskTransition { enum TaskTransition {
StartProcessing, StartProcessing,
+5 -1
View File
@@ -60,7 +60,11 @@ impl fmt::Display for Message {
// helper function to format a vector of messages // helper function to format a vector of messages
#[must_use] #[must_use]
pub fn format_history(history: &[Message]) -> String { pub fn format_history(history: &[Message]) -> String {
let mut out = String::new(); let estimated: usize = history
.iter()
.map(|m| m.content.len() + 10)
.sum();
let mut out = String::with_capacity(estimated);
for (i, msg) in history.iter().enumerate() { for (i, msg) in history.iter().enumerate() {
if i > 0 { if i > 0 {
out.push('\n'); out.push('\n');
+2 -2
View File
@@ -20,7 +20,7 @@ pub mod user;
pub trait StoredObject: Serialize + for<'de> Deserialize<'de> { pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
fn table_name() -> &'static str; fn table_name() -> &'static str;
fn get_id(&self) -> &str; fn id(&self) -> &str;
} }
#[macro_export] #[macro_export]
@@ -52,7 +52,7 @@ macro_rules! stored_object {
$table $table
} }
fn get_id(&self) -> &str { fn id(&self) -> &str {
&self.id &self.id
} }
} }
+1 -1
View File
@@ -28,7 +28,7 @@ impl StoredObject for SystemSettings {
"system_settings" "system_settings"
} }
fn get_id(&self) -> &str { fn id(&self) -> &str {
&self.id &self.id
} }
} }
+3
View File
@@ -1,8 +1,11 @@
use super::config::AppConfig; use super::config::AppConfig;
/// Errors raised when validating ingestion payloads against configured limits.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum IngestValidationError { pub enum IngestValidationError {
/// The payload exceeds a configured size limit (content, context, or category).
PayloadTooLarge(String), PayloadTooLarge(String),
/// The request violates a non-size constraint (e.g., too many files).
BadRequest(String), BadRequest(String),
} }
+1 -1
View File
@@ -340,7 +340,7 @@ pub async fn ensure_corpus(
corpus_questions.push(CorpusQuestion { corpus_questions.push(CorpusQuestion {
question_id: case.question.id.clone(), question_id: case.question.id.clone(),
paragraph_id: case.paragraph.id.clone(), paragraph_id: case.paragraph.id.clone(),
text_content_id: record.shard.text_content.get_id().to_string(), text_content_id: record.shard.text_content.id().to_string(),
question_text: case.question.question.clone(), question_text: case.question.question.clone(),
answers: case.question.answers.clone(), answers: case.question.answers.clone(),
is_impossible: case.question.is_impossible, is_impossible: case.question.is_impossible,
+1 -1
View File
@@ -575,7 +575,7 @@ fn validate_answers(
if chunk_text.contains(&needle) if chunk_text.contains(&needle)
|| (!needle_norm.is_empty() && chunk_norm.contains(&needle_norm)) || (!needle_norm.is_empty() && chunk_norm.contains(&needle_norm))
{ {
matches.insert(chunk.chunk.get_id().to_string()); matches.insert(chunk.chunk.id().to_string());
found_any = true; found_any = true;
} }
} }
+1 -1
View File
@@ -216,7 +216,7 @@ pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
theme: Theme::System, theme: Theme::System,
}; };
if let Some(existing) = db.get_item::<User>(user.get_id()).await? { if let Some(existing) = db.get_item::<User>(user.id()).await? {
return Ok(existing); return Ok(existing);
} }
@@ -247,7 +247,7 @@ pub(crate) async fn run_queries(
|| candidate || candidate
.chunks .chunks
.iter() .iter()
.any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.get_id())) .any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.id()))
} else { } else {
true true
}; };
+6 -6
View File
@@ -194,7 +194,7 @@ impl EvaluationCandidate {
fn from_entity(entity: RetrievedEntity) -> Self { fn from_entity(entity: RetrievedEntity) -> Self {
let entity_category = Some(format!("{:?}", entity.entity.entity_type)); let entity_category = Some(format!("{:?}", entity.entity.entity_type));
Self { Self {
entity_id: entity.entity.get_id().to_string(), entity_id: entity.entity.id().to_string(),
source_id: entity.entity.source_id.clone(), source_id: entity.entity.source_id.clone(),
entity_name: entity.entity.name.clone(), entity_name: entity.entity.name.clone(),
entity_description: Some(entity.entity.description.clone()), entity_description: Some(entity.entity.description.clone()),
@@ -207,7 +207,7 @@ impl EvaluationCandidate {
fn from_chunk(chunk: RetrievedChunk) -> Self { fn from_chunk(chunk: RetrievedChunk) -> Self {
let snippet = chunk_snippet(&chunk.chunk.chunk); let snippet = chunk_snippet(&chunk.chunk.chunk);
Self { Self {
entity_id: chunk.chunk.get_id().to_string(), entity_id: chunk.chunk.id().to_string(),
source_id: chunk.chunk.source_id.clone(), source_id: chunk.chunk.source_id.clone(),
entity_name: chunk.chunk.source_id.clone(), entity_name: chunk.chunk.source_id.clone(),
entity_description: Some(snippet), entity_description: Some(snippet),
@@ -423,11 +423,11 @@ pub fn build_case_diagnostics(
let mut chunk_entries = Vec::new(); let mut chunk_entries = Vec::new();
for chunk in &candidate.chunks { for chunk in &candidate.chunks {
let contains_answer = text_contains_answer(&chunk.chunk.chunk, answers_lower); let contains_answer = text_contains_answer(&chunk.chunk.chunk, answers_lower);
let expected_chunk = expected_set.contains(chunk.chunk.get_id()); let expected_chunk = expected_set.contains(chunk.chunk.id());
seen_chunks.insert(chunk.chunk.get_id().to_string()); seen_chunks.insert(chunk.chunk.id().to_string());
attached_chunk_ids.push(chunk.chunk.get_id().to_string()); attached_chunk_ids.push(chunk.chunk.id().to_string());
chunk_entries.push(ChunkDiagnosticsEntry { chunk_entries.push(ChunkDiagnosticsEntry {
chunk_id: chunk.chunk.get_id().to_string(), chunk_id: chunk.chunk.id().to_string(),
score: chunk.score, score: chunk.score,
contains_answer, contains_answer,
expected_chunk, expected_chunk,
+2 -2
View File
@@ -240,7 +240,7 @@ impl PipelineServices for DefaultPipelineServices {
) -> Result<(Vec<EmbeddedKnowledgeEntity>, Vec<KnowledgeRelationship>), AppError> { ) -> Result<(Vec<EmbeddedKnowledgeEntity>, Vec<KnowledgeRelationship>), AppError> {
analysis analysis
.to_database_entities( .to_database_entities(
content.get_id(), content.id(),
&content.user_id, &content.user_id,
&self.openai_client, &self.openai_client,
&self.db, &self.db,
@@ -271,7 +271,7 @@ impl PipelineServices for DefaultPipelineServices {
.await .await
.map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?; .map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?;
let chunk_struct = TextChunk::new( let chunk_struct = TextChunk::new(
content.get_id().to_string(), content.id().to_string(),
chunk_text, chunk_text,
content.user_id.clone(), content.user_id.clone(),
); );
+6 -6
View File
@@ -188,7 +188,7 @@ pub fn merge_scored_by_id<T, S: std::hash::BuildHasher>(
T: StoredObject + Clone, T: StoredObject + Clone,
{ {
for scored in incoming { for scored in incoming {
let id = scored.item.get_id().to_owned(); let id = scored.item.id().to_owned();
target target
.entry(id) .entry(id)
.and_modify(|existing| { .and_modify(|existing| {
@@ -218,7 +218,7 @@ where
b.fused b.fused
.partial_cmp(&a.fused) .partial_cmp(&a.fused)
.unwrap_or(Ordering::Equal) .unwrap_or(Ordering::Equal)
.then_with(|| a.item.get_id().cmp(b.item.get_id())) .then_with(|| a.item.id().cmp(b.item.id()))
}); });
} }
@@ -250,11 +250,11 @@ where
b_score b_score
.partial_cmp(&a_score) .partial_cmp(&a_score)
.unwrap_or(Ordering::Equal) .unwrap_or(Ordering::Equal)
.then_with(|| a.item.get_id().cmp(b.item.get_id())) .then_with(|| a.item.id().cmp(b.item.id()))
}); });
for (rank, candidate) in vector_ranked.into_iter().enumerate() { for (rank, candidate) in vector_ranked.into_iter().enumerate() {
let id = candidate.item.get_id().to_owned(); let id = candidate.item.id().to_owned();
let entry = merged let entry = merged
.entry(id.clone()) .entry(id.clone())
.or_insert_with(|| Scored::new(candidate.item.clone())); .or_insert_with(|| Scored::new(candidate.item.clone()));
@@ -280,11 +280,11 @@ where
b_score b_score
.partial_cmp(&a_score) .partial_cmp(&a_score)
.unwrap_or(Ordering::Equal) .unwrap_or(Ordering::Equal)
.then_with(|| a.item.get_id().cmp(b.item.get_id())) .then_with(|| a.item.id().cmp(b.item.id()))
}); });
for (rank, candidate) in fts_ranked.into_iter().enumerate() { for (rank, candidate) in fts_ranked.into_iter().enumerate() {
let id = candidate.item.get_id().to_owned(); let id = candidate.item.id().to_owned();
let entry = merged let entry = merged
.entry(id.clone()) .entry(id.clone())
.or_insert_with(|| Scored::new(candidate.item.clone())); .or_insert_with(|| Scored::new(candidate.item.clone()));