mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-29 19:00:51 +02:00
chore: rename get_id to id, add doc comments, pre-allocate format_history
This commit is contained in:
@@ -143,7 +143,7 @@ impl SurrealDbClient {
|
|||||||
T: StoredObject + Send + Sync + 'static,
|
T: StoredObject + Send + Sync + 'static,
|
||||||
{
|
{
|
||||||
self.client
|
self.client
|
||||||
.create((T::table_name(), item.get_id()))
|
.create((T::table_name(), item.id()))
|
||||||
.content(item)
|
.content(item)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
@@ -159,7 +159,7 @@ impl SurrealDbClient {
|
|||||||
where
|
where
|
||||||
T: StoredObject + Send + Sync + 'static,
|
T: StoredObject + Send + Sync + 'static,
|
||||||
{
|
{
|
||||||
let id = item.get_id().to_string();
|
let id = item.id().to_string();
|
||||||
self.client
|
self.client
|
||||||
.upsert((T::table_name(), id))
|
.upsert((T::table_name(), id))
|
||||||
.content(item)
|
.content(item)
|
||||||
|
|||||||
@@ -581,14 +581,22 @@ async fn poll_index_build_status(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Snapshot of an index build progress as reported by SurrealDB's `INFO FOR INDEX`.
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
struct IndexBuildSnapshot {
|
struct IndexBuildSnapshot {
|
||||||
|
/// Current build status string (e.g., `"indexing"`, `"ready"`, `"error"`).
|
||||||
status: String,
|
status: String,
|
||||||
|
/// Number of rows present when the build started.
|
||||||
initial: u64,
|
initial: u64,
|
||||||
|
/// Number of rows still pending processing.
|
||||||
pending: u64,
|
pending: u64,
|
||||||
|
/// Number of rows updated since the build started.
|
||||||
updated: u64,
|
updated: u64,
|
||||||
|
/// Total rows processed so far (`initial + updated`).
|
||||||
processed: u64,
|
processed: u64,
|
||||||
|
/// Total rows expected (from `SELECT count()` before the build), if available.
|
||||||
total_rows: Option<u64>,
|
total_rows: Option<u64>,
|
||||||
|
/// Progress as a percentage of `processed / total_rows`, if `total_rows` is known.
|
||||||
progress_pct: Option<f64>,
|
progress_pct: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ impl StoredObject for Analytics {
|
|||||||
"analytics"
|
"analytics"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_id(&self) -> &str {
|
fn id(&self) -> &str {
|
||||||
&self.id
|
&self.id
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,26 +19,34 @@ use crate::{
|
|||||||
stored_object,
|
stored_object,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Errors that can occur during file storage operations.
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum FileError {
|
pub enum FileError {
|
||||||
|
/// No file record found with the given UUID.
|
||||||
#[error("file not found for uuid: {0}")]
|
#[error("file not found for uuid: {0}")]
|
||||||
FileNotFound(String),
|
FileNotFound(String),
|
||||||
|
|
||||||
|
/// Underlying I/O operation failed.
|
||||||
#[error("io error occurred: {0}")]
|
#[error("io error occurred: {0}")]
|
||||||
Io(#[from] std::io::Error),
|
Io(#[from] std::io::Error),
|
||||||
|
|
||||||
|
/// A file with the same SHA-256 hash already exists.
|
||||||
#[error("duplicate file detected with sha256: {0}")]
|
#[error("duplicate file detected with sha256: {0}")]
|
||||||
DuplicateFile(String),
|
DuplicateFile(String),
|
||||||
|
|
||||||
|
/// Database operation on the file record failed.
|
||||||
#[error("surrealdb error: {0}")]
|
#[error("surrealdb error: {0}")]
|
||||||
SurrealError(#[from] surrealdb::Error),
|
SurrealError(#[from] surrealdb::Error),
|
||||||
|
|
||||||
|
/// Failed to persist the temporary file to its final location.
|
||||||
#[error("failed to persist file: {0}")]
|
#[error("failed to persist file: {0}")]
|
||||||
PersistError(#[from] tempfile::PersistError),
|
PersistError(#[from] tempfile::PersistError),
|
||||||
|
|
||||||
|
/// Upload metadata did not include a file name.
|
||||||
#[error("file name missing in metadata")]
|
#[error("file name missing in metadata")]
|
||||||
MissingFileName,
|
MissingFileName,
|
||||||
|
|
||||||
|
/// The underlying object store operation failed.
|
||||||
#[error("object store error: {0}")]
|
#[error("object store error: {0}")]
|
||||||
ObjectStore(#[from] ObjectStoreError),
|
ObjectStore(#[from] ObjectStoreError),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,12 +68,16 @@ impl TaskState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Information about an error that occurred during task processing.
|
||||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)]
|
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)]
|
||||||
pub struct TaskErrorInfo {
|
pub struct TaskErrorInfo {
|
||||||
|
/// Machine-readable error code (e.g., `"pipeline_error"`).
|
||||||
pub code: Option<String>,
|
pub code: Option<String>,
|
||||||
|
/// Human-readable error description.
|
||||||
pub message: String,
|
pub message: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Internal events that drive the task state machine transitions.
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
enum TaskTransition {
|
enum TaskTransition {
|
||||||
StartProcessing,
|
StartProcessing,
|
||||||
|
|||||||
@@ -60,7 +60,11 @@ impl fmt::Display for Message {
|
|||||||
// helper function to format a vector of messages
|
// helper function to format a vector of messages
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn format_history(history: &[Message]) -> String {
|
pub fn format_history(history: &[Message]) -> String {
|
||||||
let mut out = String::new();
|
let estimated: usize = history
|
||||||
|
.iter()
|
||||||
|
.map(|m| m.content.len() + 10)
|
||||||
|
.sum();
|
||||||
|
let mut out = String::with_capacity(estimated);
|
||||||
for (i, msg) in history.iter().enumerate() {
|
for (i, msg) in history.iter().enumerate() {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
out.push('\n');
|
out.push('\n');
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ pub mod user;
|
|||||||
|
|
||||||
pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
|
pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
|
||||||
fn table_name() -> &'static str;
|
fn table_name() -> &'static str;
|
||||||
fn get_id(&self) -> &str;
|
fn id(&self) -> &str;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
@@ -52,7 +52,7 @@ macro_rules! stored_object {
|
|||||||
$table
|
$table
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_id(&self) -> &str {
|
fn id(&self) -> &str {
|
||||||
&self.id
|
&self.id
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ impl StoredObject for SystemSettings {
|
|||||||
"system_settings"
|
"system_settings"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_id(&self) -> &str {
|
fn id(&self) -> &str {
|
||||||
&self.id
|
&self.id
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
use super::config::AppConfig;
|
use super::config::AppConfig;
|
||||||
|
|
||||||
|
/// Errors raised when validating ingestion payloads against configured limits.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum IngestValidationError {
|
pub enum IngestValidationError {
|
||||||
|
/// The payload exceeds a configured size limit (content, context, or category).
|
||||||
PayloadTooLarge(String),
|
PayloadTooLarge(String),
|
||||||
|
/// The request violates a non-size constraint (e.g., too many files).
|
||||||
BadRequest(String),
|
BadRequest(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -340,7 +340,7 @@ pub async fn ensure_corpus(
|
|||||||
corpus_questions.push(CorpusQuestion {
|
corpus_questions.push(CorpusQuestion {
|
||||||
question_id: case.question.id.clone(),
|
question_id: case.question.id.clone(),
|
||||||
paragraph_id: case.paragraph.id.clone(),
|
paragraph_id: case.paragraph.id.clone(),
|
||||||
text_content_id: record.shard.text_content.get_id().to_string(),
|
text_content_id: record.shard.text_content.id().to_string(),
|
||||||
question_text: case.question.question.clone(),
|
question_text: case.question.question.clone(),
|
||||||
answers: case.question.answers.clone(),
|
answers: case.question.answers.clone(),
|
||||||
is_impossible: case.question.is_impossible,
|
is_impossible: case.question.is_impossible,
|
||||||
|
|||||||
@@ -575,7 +575,7 @@ fn validate_answers(
|
|||||||
if chunk_text.contains(&needle)
|
if chunk_text.contains(&needle)
|
||||||
|| (!needle_norm.is_empty() && chunk_norm.contains(&needle_norm))
|
|| (!needle_norm.is_empty() && chunk_norm.contains(&needle_norm))
|
||||||
{
|
{
|
||||||
matches.insert(chunk.chunk.get_id().to_string());
|
matches.insert(chunk.chunk.id().to_string());
|
||||||
found_any = true;
|
found_any = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -216,7 +216,7 @@ pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
|
|||||||
theme: Theme::System,
|
theme: Theme::System,
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(existing) = db.get_item::<User>(user.get_id()).await? {
|
if let Some(existing) = db.get_item::<User>(user.id()).await? {
|
||||||
return Ok(existing);
|
return Ok(existing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -247,7 +247,7 @@ pub(crate) async fn run_queries(
|
|||||||
|| candidate
|
|| candidate
|
||||||
.chunks
|
.chunks
|
||||||
.iter()
|
.iter()
|
||||||
.any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.get_id()))
|
.any(|chunk| expected_chunk_ids_set.contains(&chunk.chunk.id()))
|
||||||
} else {
|
} else {
|
||||||
true
|
true
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -194,7 +194,7 @@ impl EvaluationCandidate {
|
|||||||
fn from_entity(entity: RetrievedEntity) -> Self {
|
fn from_entity(entity: RetrievedEntity) -> Self {
|
||||||
let entity_category = Some(format!("{:?}", entity.entity.entity_type));
|
let entity_category = Some(format!("{:?}", entity.entity.entity_type));
|
||||||
Self {
|
Self {
|
||||||
entity_id: entity.entity.get_id().to_string(),
|
entity_id: entity.entity.id().to_string(),
|
||||||
source_id: entity.entity.source_id.clone(),
|
source_id: entity.entity.source_id.clone(),
|
||||||
entity_name: entity.entity.name.clone(),
|
entity_name: entity.entity.name.clone(),
|
||||||
entity_description: Some(entity.entity.description.clone()),
|
entity_description: Some(entity.entity.description.clone()),
|
||||||
@@ -207,7 +207,7 @@ impl EvaluationCandidate {
|
|||||||
fn from_chunk(chunk: RetrievedChunk) -> Self {
|
fn from_chunk(chunk: RetrievedChunk) -> Self {
|
||||||
let snippet = chunk_snippet(&chunk.chunk.chunk);
|
let snippet = chunk_snippet(&chunk.chunk.chunk);
|
||||||
Self {
|
Self {
|
||||||
entity_id: chunk.chunk.get_id().to_string(),
|
entity_id: chunk.chunk.id().to_string(),
|
||||||
source_id: chunk.chunk.source_id.clone(),
|
source_id: chunk.chunk.source_id.clone(),
|
||||||
entity_name: chunk.chunk.source_id.clone(),
|
entity_name: chunk.chunk.source_id.clone(),
|
||||||
entity_description: Some(snippet),
|
entity_description: Some(snippet),
|
||||||
@@ -423,11 +423,11 @@ pub fn build_case_diagnostics(
|
|||||||
let mut chunk_entries = Vec::new();
|
let mut chunk_entries = Vec::new();
|
||||||
for chunk in &candidate.chunks {
|
for chunk in &candidate.chunks {
|
||||||
let contains_answer = text_contains_answer(&chunk.chunk.chunk, answers_lower);
|
let contains_answer = text_contains_answer(&chunk.chunk.chunk, answers_lower);
|
||||||
let expected_chunk = expected_set.contains(chunk.chunk.get_id());
|
let expected_chunk = expected_set.contains(chunk.chunk.id());
|
||||||
seen_chunks.insert(chunk.chunk.get_id().to_string());
|
seen_chunks.insert(chunk.chunk.id().to_string());
|
||||||
attached_chunk_ids.push(chunk.chunk.get_id().to_string());
|
attached_chunk_ids.push(chunk.chunk.id().to_string());
|
||||||
chunk_entries.push(ChunkDiagnosticsEntry {
|
chunk_entries.push(ChunkDiagnosticsEntry {
|
||||||
chunk_id: chunk.chunk.get_id().to_string(),
|
chunk_id: chunk.chunk.id().to_string(),
|
||||||
score: chunk.score,
|
score: chunk.score,
|
||||||
contains_answer,
|
contains_answer,
|
||||||
expected_chunk,
|
expected_chunk,
|
||||||
|
|||||||
@@ -240,7 +240,7 @@ impl PipelineServices for DefaultPipelineServices {
|
|||||||
) -> Result<(Vec<EmbeddedKnowledgeEntity>, Vec<KnowledgeRelationship>), AppError> {
|
) -> Result<(Vec<EmbeddedKnowledgeEntity>, Vec<KnowledgeRelationship>), AppError> {
|
||||||
analysis
|
analysis
|
||||||
.to_database_entities(
|
.to_database_entities(
|
||||||
content.get_id(),
|
content.id(),
|
||||||
&content.user_id,
|
&content.user_id,
|
||||||
&self.openai_client,
|
&self.openai_client,
|
||||||
&self.db,
|
&self.db,
|
||||||
@@ -271,7 +271,7 @@ impl PipelineServices for DefaultPipelineServices {
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?;
|
.map_err(|e| AppError::InternalError(format!("FastEmbed embedding for chunk failed: {e}")))?;
|
||||||
let chunk_struct = TextChunk::new(
|
let chunk_struct = TextChunk::new(
|
||||||
content.get_id().to_string(),
|
content.id().to_string(),
|
||||||
chunk_text,
|
chunk_text,
|
||||||
content.user_id.clone(),
|
content.user_id.clone(),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -188,7 +188,7 @@ pub fn merge_scored_by_id<T, S: std::hash::BuildHasher>(
|
|||||||
T: StoredObject + Clone,
|
T: StoredObject + Clone,
|
||||||
{
|
{
|
||||||
for scored in incoming {
|
for scored in incoming {
|
||||||
let id = scored.item.get_id().to_owned();
|
let id = scored.item.id().to_owned();
|
||||||
target
|
target
|
||||||
.entry(id)
|
.entry(id)
|
||||||
.and_modify(|existing| {
|
.and_modify(|existing| {
|
||||||
@@ -218,7 +218,7 @@ where
|
|||||||
b.fused
|
b.fused
|
||||||
.partial_cmp(&a.fused)
|
.partial_cmp(&a.fused)
|
||||||
.unwrap_or(Ordering::Equal)
|
.unwrap_or(Ordering::Equal)
|
||||||
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
|
.then_with(|| a.item.id().cmp(b.item.id()))
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -250,11 +250,11 @@ where
|
|||||||
b_score
|
b_score
|
||||||
.partial_cmp(&a_score)
|
.partial_cmp(&a_score)
|
||||||
.unwrap_or(Ordering::Equal)
|
.unwrap_or(Ordering::Equal)
|
||||||
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
|
.then_with(|| a.item.id().cmp(b.item.id()))
|
||||||
});
|
});
|
||||||
|
|
||||||
for (rank, candidate) in vector_ranked.into_iter().enumerate() {
|
for (rank, candidate) in vector_ranked.into_iter().enumerate() {
|
||||||
let id = candidate.item.get_id().to_owned();
|
let id = candidate.item.id().to_owned();
|
||||||
let entry = merged
|
let entry = merged
|
||||||
.entry(id.clone())
|
.entry(id.clone())
|
||||||
.or_insert_with(|| Scored::new(candidate.item.clone()));
|
.or_insert_with(|| Scored::new(candidate.item.clone()));
|
||||||
@@ -280,11 +280,11 @@ where
|
|||||||
b_score
|
b_score
|
||||||
.partial_cmp(&a_score)
|
.partial_cmp(&a_score)
|
||||||
.unwrap_or(Ordering::Equal)
|
.unwrap_or(Ordering::Equal)
|
||||||
.then_with(|| a.item.get_id().cmp(b.item.get_id()))
|
.then_with(|| a.item.id().cmp(b.item.id()))
|
||||||
});
|
});
|
||||||
|
|
||||||
for (rank, candidate) in fts_ranked.into_iter().enumerate() {
|
for (rank, candidate) in fts_ranked.into_iter().enumerate() {
|
||||||
let id = candidate.item.get_id().to_owned();
|
let id = candidate.item.id().to_owned();
|
||||||
let entry = merged
|
let entry = merged
|
||||||
.entry(id.clone())
|
.entry(id.clone())
|
||||||
.or_insert_with(|| Scored::new(candidate.item.clone()));
|
.or_insert_with(|| Scored::new(candidate.item.clone()));
|
||||||
|
|||||||
Reference in New Issue
Block a user