mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-29 19:00:51 +02:00
chore: rename get_id to id, add doc comments, pre-allocate format_history
This commit is contained in:
@@ -143,7 +143,7 @@ impl SurrealDbClient {
|
||||
T: StoredObject + Send + Sync + 'static,
|
||||
{
|
||||
self.client
|
||||
.create((T::table_name(), item.get_id()))
|
||||
.create((T::table_name(), item.id()))
|
||||
.content(item)
|
||||
.await
|
||||
}
|
||||
@@ -159,7 +159,7 @@ impl SurrealDbClient {
|
||||
where
|
||||
T: StoredObject + Send + Sync + 'static,
|
||||
{
|
||||
let id = item.get_id().to_string();
|
||||
let id = item.id().to_string();
|
||||
self.client
|
||||
.upsert((T::table_name(), id))
|
||||
.content(item)
|
||||
|
||||
@@ -581,14 +581,22 @@ async fn poll_index_build_status(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Snapshot of an index build progress as reported by SurrealDB's `INFO FOR INDEX`.
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct IndexBuildSnapshot {
|
||||
/// Current build status string (e.g., `"indexing"`, `"ready"`, `"error"`).
|
||||
status: String,
|
||||
/// Number of rows present when the build started.
|
||||
initial: u64,
|
||||
/// Number of rows still pending processing.
|
||||
pending: u64,
|
||||
/// Number of rows updated since the build started.
|
||||
updated: u64,
|
||||
/// Total rows processed so far (`initial + updated`).
|
||||
processed: u64,
|
||||
/// Total rows expected (from `SELECT count()` before the build), if available.
|
||||
total_rows: Option<u64>,
|
||||
/// Progress as a percentage of `processed / total_rows`, if `total_rows` is known.
|
||||
progress_pct: Option<f64>,
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ impl StoredObject for Analytics {
|
||||
"analytics"
|
||||
}
|
||||
|
||||
fn get_id(&self) -> &str {
|
||||
fn id(&self) -> &str {
|
||||
&self.id
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,26 +19,34 @@ use crate::{
|
||||
stored_object,
|
||||
};
|
||||
|
||||
/// Errors that can occur during file storage operations.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum FileError {
|
||||
/// No file record found with the given UUID.
|
||||
#[error("file not found for uuid: {0}")]
|
||||
FileNotFound(String),
|
||||
|
||||
/// Underlying I/O operation failed.
|
||||
#[error("io error occurred: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
/// A file with the same SHA-256 hash already exists.
|
||||
#[error("duplicate file detected with sha256: {0}")]
|
||||
DuplicateFile(String),
|
||||
|
||||
/// Database operation on the file record failed.
|
||||
#[error("surrealdb error: {0}")]
|
||||
SurrealError(#[from] surrealdb::Error),
|
||||
|
||||
/// Failed to persist the temporary file to its final location.
|
||||
#[error("failed to persist file: {0}")]
|
||||
PersistError(#[from] tempfile::PersistError),
|
||||
|
||||
/// Upload metadata did not include a file name.
|
||||
#[error("file name missing in metadata")]
|
||||
MissingFileName,
|
||||
|
||||
/// The underlying object store operation failed.
|
||||
#[error("object store error: {0}")]
|
||||
ObjectStore(#[from] ObjectStoreError),
|
||||
}
|
||||
|
||||
@@ -68,12 +68,16 @@ impl TaskState {
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about an error that occurred during task processing.
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)]
|
||||
pub struct TaskErrorInfo {
|
||||
/// Machine-readable error code (e.g., `"pipeline_error"`).
|
||||
pub code: Option<String>,
|
||||
/// Human-readable error description.
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Internal events that drive the task state machine transitions.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum TaskTransition {
|
||||
StartProcessing,
|
||||
|
||||
@@ -60,7 +60,11 @@ impl fmt::Display for Message {
|
||||
// helper function to format a vector of messages
|
||||
#[must_use]
|
||||
pub fn format_history(history: &[Message]) -> String {
|
||||
let mut out = String::new();
|
||||
let estimated: usize = history
|
||||
.iter()
|
||||
.map(|m| m.content.len() + 10)
|
||||
.sum();
|
||||
let mut out = String::with_capacity(estimated);
|
||||
for (i, msg) in history.iter().enumerate() {
|
||||
if i > 0 {
|
||||
out.push('\n');
|
||||
|
||||
@@ -20,7 +20,7 @@ pub mod user;
|
||||
|
||||
pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
|
||||
fn table_name() -> &'static str;
|
||||
fn get_id(&self) -> &str;
|
||||
fn id(&self) -> &str;
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
@@ -52,7 +52,7 @@ macro_rules! stored_object {
|
||||
$table
|
||||
}
|
||||
|
||||
fn get_id(&self) -> &str {
|
||||
fn id(&self) -> &str {
|
||||
&self.id
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ impl StoredObject for SystemSettings {
|
||||
"system_settings"
|
||||
}
|
||||
|
||||
fn get_id(&self) -> &str {
|
||||
fn id(&self) -> &str {
|
||||
&self.id
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
use super::config::AppConfig;
|
||||
|
||||
/// Errors raised when validating ingestion payloads against configured limits.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum IngestValidationError {
|
||||
/// The payload exceeds a configured size limit (content, context, or category).
|
||||
PayloadTooLarge(String),
|
||||
/// The request violates a non-size constraint (e.g., too many files).
|
||||
BadRequest(String),
|
||||
}
|
||||
|
||||
|
||||
@@ -340,7 +340,7 @@ pub async fn ensure_corpus(
|
||||
corpus_questions.push(CorpusQuestion {
|
||||
question_id: case.question.id.clone(),
|
||||
paragraph_id: case.paragraph.id.clone(),
|
||||
text_content_id: record.shard.text_content.get_id().to_string(),
|
||||
text_content_id: record.shard.text_content.id().to_string(),
|
||||
question_text: case.question.question.clone(),
|
||||
answers: case.question.answers.clone(),
|
||||
is_impossible: case.question.is_impossible,
|
||||
|
||||
@@ -575,7 +575,7 @@ fn validate_answers(
|
||||
if chunk_text.contains(&needle)
|
||||
|| (!needle_norm.is_empty() && chunk_norm.contains(&needle_norm))
|
||||
{
|
||||
matches.insert(chunk.chunk.get_id().to_string());
|
||||
matches.insert(chunk.chunk.id().to_string());
|
||||
found_any = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,7 +216,7 @@ pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
|
||||
theme: Theme::System,
|
||||
};
|
||||
|
||||
if let Some(existing) = db.get_item::<User>(user.get_id()).await? {
|
||||
if let Some(existing) = db.get_item::<User>(user.id()).await? {
|
||||
return Ok(existing);
|
||||
}
|
||||
|
||||
|
||||
@@ -247,7 +247,7 @@ pub(crate) async fn run_queries(
|
||||
|| candidate
|
||||
.chunks
|
||||
.iter()
|
||||
.any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.get_id()))
|
||||
.any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.id()))
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
@@ -194,7 +194,7 @@ impl EvaluationCandidate {
|
||||
fn from_entity(entity: RetrievedEntity) -> Self {
|
||||
let entity_category = Some(format!("{:?}", entity.entity.entity_type));
|
||||
Self {
|
||||
entity_id: entity.entity.get_id().to_string(),
|
||||
entity_id: entity.entity.id().to_string(),
|
||||
source_id: entity.entity.source_id.clone(),
|
||||
entity_name: entity.entity.name.clone(),
|
||||
entity_description: Some(entity.entity.description.clone()),
|
||||
@@ -207,7 +207,7 @@ impl EvaluationCandidate {
|
||||
fn from_chunk(chunk: RetrievedChunk) -> Self {
|
||||
let snippet = chunk_snippet(&chunk.chunk.chunk);
|
||||
Self {
|
||||
entity_id: chunk.chunk.get_id().to_string(),
|
||||
entity_id: chunk.chunk.id().to_string(),
|
||||
source_id: chunk.chunk.source_id.clone(),
|
||||
entity_name: chunk.chunk.source_id.clone(),
|
||||
entity_description: Some(snippet),
|
||||
@@ -423,11 +423,11 @@ pub fn build_case_diagnostics(
|
||||
let mut chunk_entries = Vec::new();
|
||||
for chunk in &candidate.chunks {
|
||||
let contains_answer = text_contains_answer(&chunk.chunk.chunk, answers_lower);
|
||||
let expected_chunk = expected_set.contains(chunk.chunk.get_id());
|
||||
seen_chunks.insert(chunk.chunk.get_id().to_string());
|
||||
attached_chunk_ids.push(chunk.chunk.get_id().to_string());
|
||||
let expected_chunk = expected_set.contains(chunk.chunk.id());
|
||||
seen_chunks.insert(chunk.chunk.id().to_string());
|
||||
attached_chunk_ids.push(chunk.chunk.id().to_string());
|
||||
chunk_entries.push(ChunkDiagnosticsEntry {
|
||||
chunk_id: chunk.chunk.get_id().to_string(),
|
||||
chunk_id: chunk.chunk.id().to_string(),
|
||||
score: chunk.score,
|
||||
contains_answer,
|
||||
expected_chunk,
|
||||
|
||||
@@ -240,7 +240,7 @@ impl PipelineServices for DefaultPipelineServices {
|
||||
) -> Result<(Vec<EmbeddedKnowledgeEntity>, Vec<KnowledgeRelationship>), AppError> {
|
||||
analysis
|
||||
.to_database_entities(
|
||||
content.get_id(),
|
||||
content.id(),
|
||||
&content.user_id,
|
||||
&self.openai_client,
|
||||
&self.db,
|
||||
@@ -271,7 +271,7 @@ impl PipelineServices for DefaultPipelineServices {
|
||||
.await
|
||||
.map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?;
|
||||
let chunk_struct = TextChunk::new(
|
||||
content.get_id().to_string(),
|
||||
content.id().to_string(),
|
||||
chunk_text,
|
||||
content.user_id.clone(),
|
||||
);
|
||||
|
||||
@@ -188,7 +188,7 @@ pub fn merge_scored_by_id<T, S: std::hash::BuildHasher>(
|
||||
T: StoredObject + Clone,
|
||||
{
|
||||
for scored in incoming {
|
||||
let id = scored.item.get_id().to_owned();
|
||||
let id = scored.item.id().to_owned();
|
||||
target
|
||||
.entry(id)
|
||||
.and_modify(|existing| {
|
||||
@@ -218,7 +218,7 @@ where
|
||||
b.fused
|
||||
.partial_cmp(&a.fused)
|
||||
.unwrap_or(Ordering::Equal)
|
||||
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
|
||||
.then_with(|| a.item.id().cmp(b.item.id()))
|
||||
});
|
||||
}
|
||||
|
||||
@@ -250,11 +250,11 @@ where
|
||||
b_score
|
||||
.partial_cmp(&a_score)
|
||||
.unwrap_or(Ordering::Equal)
|
||||
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
|
||||
.then_with(|| a.item.id().cmp(b.item.id()))
|
||||
});
|
||||
|
||||
for (rank, candidate) in vector_ranked.into_iter().enumerate() {
|
||||
let id = candidate.item.get_id().to_owned();
|
||||
let id = candidate.item.id().to_owned();
|
||||
let entry = merged
|
||||
.entry(id.clone())
|
||||
.or_insert_with(|| Scored::new(candidate.item.clone()));
|
||||
@@ -280,11 +280,11 @@ where
|
||||
b_score
|
||||
.partial_cmp(&a_score)
|
||||
.unwrap_or(Ordering::Equal)
|
||||
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
|
||||
.then_with(|| a.item.id().cmp(b.item.id()))
|
||||
});
|
||||
|
||||
for (rank, candidate) in fts_ranked.into_iter().enumerate() {
|
||||
let id = candidate.item.get_id().to_owned();
|
||||
let id = candidate.item.id().to_owned();
|
||||
let entry = merged
|
||||
.entry(id.clone())
|
||||
.or_insert_with(|| Scored::new(candidate.item.clone()));
|
||||
|
||||
Reference in New Issue
Block a user