chore: rename get_id to id, add doc comments, pre-allocate format_history

This commit is contained in:
Per Stark
2026-05-27 18:06:16 +02:00
parent 224a7db451
commit 30bb59f243
16 changed files with 52 additions and 25 deletions
+2 -2
View File
@@ -143,7 +143,7 @@ impl SurrealDbClient {
T: StoredObject + Send + Sync + 'static,
{
self.client
.create((T::table_name(), item.get_id()))
.create((T::table_name(), item.id()))
.content(item)
.await
}
@@ -159,7 +159,7 @@ impl SurrealDbClient {
where
T: StoredObject + Send + Sync + 'static,
{
let id = item.get_id().to_string();
let id = item.id().to_string();
self.client
.upsert((T::table_name(), id))
.content(item)
+8
View File
@@ -581,14 +581,22 @@ async fn poll_index_build_status(
Ok(())
}
/// Snapshot of an index build progress as reported by SurrealDB's `INFO FOR INDEX`.
#[derive(Debug, PartialEq)]
struct IndexBuildSnapshot {
/// Current build status string (e.g., `"indexing"`, `"ready"`, `"error"`).
status: String,
/// Number of rows present when the build started.
initial: u64,
/// Number of rows still pending processing.
pending: u64,
/// Number of rows updated since the build started.
updated: u64,
/// Total rows processed so far (`initial + updated`).
processed: u64,
/// Total rows expected (from `SELECT count()` before the build), if available.
total_rows: Option<u64>,
/// Progress as a percentage of `processed / total_rows`, if `total_rows` is known.
progress_pct: Option<f64>,
}
+1 -1
View File
@@ -16,7 +16,7 @@ impl StoredObject for Analytics {
"analytics"
}
fn get_id(&self) -> &str {
fn id(&self) -> &str {
&self.id
}
}
+8
View File
@@ -19,26 +19,34 @@ use crate::{
stored_object,
};
/// Errors that can occur during file storage operations.
#[derive(Error, Debug)]
pub enum FileError {
/// No file record found with the given UUID.
#[error("file not found for uuid: {0}")]
FileNotFound(String),
/// Underlying I/O operation failed.
#[error("io error occurred: {0}")]
Io(#[from] std::io::Error),
/// A file with the same SHA-256 hash already exists.
#[error("duplicate file detected with sha256: {0}")]
DuplicateFile(String),
/// Database operation on the file record failed.
#[error("surrealdb error: {0}")]
SurrealError(#[from] surrealdb::Error),
/// Failed to persist the temporary file to its final location.
#[error("failed to persist file: {0}")]
PersistError(#[from] tempfile::PersistError),
/// Upload metadata did not include a file name.
#[error("file name missing in metadata")]
MissingFileName,
/// The underlying object store operation failed.
#[error("object store error: {0}")]
ObjectStore(#[from] ObjectStoreError),
}
@@ -68,12 +68,16 @@ impl TaskState {
}
}
/// Information about an error that occurred during task processing.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)]
pub struct TaskErrorInfo {
/// Machine-readable error code (e.g., `"pipeline_error"`).
pub code: Option<String>,
/// Human-readable error description.
pub message: String,
}
/// Internal events that drive the task state machine transitions.
#[derive(Debug, Clone, Copy)]
enum TaskTransition {
StartProcessing,
+5 -1
View File
@@ -60,7 +60,11 @@ impl fmt::Display for Message {
// helper function to format a vector of messages
#[must_use]
pub fn format_history(history: &[Message]) -> String {
let mut out = String::new();
let estimated: usize = history
.iter()
.map(|m| m.content.len() + 10)
.sum();
let mut out = String::with_capacity(estimated);
for (i, msg) in history.iter().enumerate() {
if i > 0 {
out.push('\n');
+2 -2
View File
@@ -20,7 +20,7 @@ pub mod user;
pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
fn table_name() -> &'static str;
fn get_id(&self) -> &str;
fn id(&self) -> &str;
}
#[macro_export]
@@ -52,7 +52,7 @@ macro_rules! stored_object {
$table
}
fn get_id(&self) -> &str {
fn id(&self) -> &str {
&self.id
}
}
+1 -1
View File
@@ -28,7 +28,7 @@ impl StoredObject for SystemSettings {
"system_settings"
}
fn get_id(&self) -> &str {
fn id(&self) -> &str {
&self.id
}
}
+3
View File
@@ -1,8 +1,11 @@
use super::config::AppConfig;
/// Errors raised when validating ingestion payloads against configured limits.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IngestValidationError {
/// The payload exceeds a configured size limit (content, context, or category).
PayloadTooLarge(String),
/// The request violates a non-size constraint (e.g., too many files).
BadRequest(String),
}
+1 -1
View File
@@ -340,7 +340,7 @@ pub async fn ensure_corpus(
corpus_questions.push(CorpusQuestion {
question_id: case.question.id.clone(),
paragraph_id: case.paragraph.id.clone(),
text_content_id: record.shard.text_content.get_id().to_string(),
text_content_id: record.shard.text_content.id().to_string(),
question_text: case.question.question.clone(),
answers: case.question.answers.clone(),
is_impossible: case.question.is_impossible,
+1 -1
View File
@@ -575,7 +575,7 @@ fn validate_answers(
if chunk_text.contains(&needle)
|| (!needle_norm.is_empty() && chunk_norm.contains(&needle_norm))
{
matches.insert(chunk.chunk.get_id().to_string());
matches.insert(chunk.chunk.id().to_string());
found_any = true;
}
}
+1 -1
View File
@@ -216,7 +216,7 @@ pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
theme: Theme::System,
};
if let Some(existing) = db.get_item::<User>(user.get_id()).await? {
if let Some(existing) = db.get_item::<User>(user.id()).await? {
return Ok(existing);
}
@@ -247,7 +247,7 @@ pub(crate) async fn run_queries(
|| candidate
.chunks
.iter()
.any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.get_id()))
.any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.id()))
} else {
true
};
+6 -6
View File
@@ -194,7 +194,7 @@ impl EvaluationCandidate {
fn from_entity(entity: RetrievedEntity) -> Self {
let entity_category = Some(format!("{:?}", entity.entity.entity_type));
Self {
entity_id: entity.entity.get_id().to_string(),
entity_id: entity.entity.id().to_string(),
source_id: entity.entity.source_id.clone(),
entity_name: entity.entity.name.clone(),
entity_description: Some(entity.entity.description.clone()),
@@ -207,7 +207,7 @@ impl EvaluationCandidate {
fn from_chunk(chunk: RetrievedChunk) -> Self {
let snippet = chunk_snippet(&chunk.chunk.chunk);
Self {
entity_id: chunk.chunk.get_id().to_string(),
entity_id: chunk.chunk.id().to_string(),
source_id: chunk.chunk.source_id.clone(),
entity_name: chunk.chunk.source_id.clone(),
entity_description: Some(snippet),
@@ -423,11 +423,11 @@ pub fn build_case_diagnostics(
let mut chunk_entries = Vec::new();
for chunk in &candidate.chunks {
let contains_answer = text_contains_answer(&chunk.chunk.chunk, answers_lower);
let expected_chunk = expected_set.contains(chunk.chunk.get_id());
seen_chunks.insert(chunk.chunk.get_id().to_string());
attached_chunk_ids.push(chunk.chunk.get_id().to_string());
let expected_chunk = expected_set.contains(chunk.chunk.id());
seen_chunks.insert(chunk.chunk.id().to_string());
attached_chunk_ids.push(chunk.chunk.id().to_string());
chunk_entries.push(ChunkDiagnosticsEntry {
chunk_id: chunk.chunk.get_id().to_string(),
chunk_id: chunk.chunk.id().to_string(),
score: chunk.score,
contains_answer,
expected_chunk,
+2 -2
View File
@@ -240,7 +240,7 @@ impl PipelineServices for DefaultPipelineServices {
) -> Result<(Vec<EmbeddedKnowledgeEntity>, Vec<KnowledgeRelationship>), AppError> {
analysis
.to_database_entities(
content.get_id(),
content.id(),
&content.user_id,
&self.openai_client,
&self.db,
@@ -271,7 +271,7 @@ impl PipelineServices for DefaultPipelineServices {
.await
.map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?;
let chunk_struct = TextChunk::new(
content.get_id().to_string(),
content.id().to_string(),
chunk_text,
content.user_id.clone(),
);
+6 -6
View File
@@ -188,7 +188,7 @@ pub fn merge_scored_by_id<T, S: std::hash::BuildHasher>(
T: StoredObject + Clone,
{
for scored in incoming {
let id = scored.item.get_id().to_owned();
let id = scored.item.id().to_owned();
target
.entry(id)
.and_modify(|existing| {
@@ -218,7 +218,7 @@ where
b.fused
.partial_cmp(&a.fused)
.unwrap_or(Ordering::Equal)
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
.then_with(|| a.item.id().cmp(b.item.id()))
});
}
@@ -250,11 +250,11 @@ where
b_score
.partial_cmp(&a_score)
.unwrap_or(Ordering::Equal)
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
.then_with(|| a.item.id().cmp(b.item.id()))
});
for (rank, candidate) in vector_ranked.into_iter().enumerate() {
let id = candidate.item.get_id().to_owned();
let id = candidate.item.id().to_owned();
let entry = merged
.entry(id.clone())
.or_insert_with(|| Scored::new(candidate.item.clone()));
@@ -280,11 +280,11 @@ where
b_score
.partial_cmp(&a_score)
.unwrap_or(Ordering::Equal)
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
.then_with(|| a.item.id().cmp(b.item.id()))
});
for (rank, candidate) in fts_ranked.into_iter().enumerate() {
let id = candidate.item.get_id().to_owned();
let id = candidate.item.id().to_owned();
let entry = merged
.entry(id.clone())
.or_insert_with(|| Scored::new(candidate.item.clone()));