From c60db0fb56ea039136ff84ef712061fd0d54a1e4 Mon Sep 17 00:00:00 2001 From: Per Stark Date: Wed, 27 May 2026 10:28:08 +0200 Subject: [PATCH] perf: avoid small own clones and intermediate Vec allocations - Derive Copy on 6 small enums (MessageRole, TaskState, StorageKind, EmbeddingBackend, PdfIngestMode, KnowledgeEntityType) - Change create_ingestion_payload files param from Vec to &[FileInfo] - Remove 5 intermediate Vec allocations (4 embedding serialization + 1 format_history) using write! loop - Remove 7 unnecessary .clone() calls exposed by Copy derive --- api-router/src/routes/ingest.rs | 2 +- common/src/storage/store.rs | 2 +- common/src/storage/types/ingestion_payload.rs | 16 +++---- common/src/storage/types/ingestion_task.rs | 2 +- common/src/storage/types/knowledge_entity.rs | 43 ++++++++++--------- common/src/storage/types/message.rs | 19 ++++---- common/src/storage/types/text_chunk.rs | 32 +++++++------- common/src/utils/config.rs | 6 +-- html-router/src/routes/ingestion/handlers.rs | 2 +- 9 files changed, 64 insertions(+), 60 deletions(-) diff --git a/api-router/src/routes/ingest.rs b/api-router/src/routes/ingest.rs index 3938952..2542e33 100644 --- a/api-router/src/routes/ingest.rs +++ b/api-router/src/routes/ingest.rs @@ -73,7 +73,7 @@ pub async fn ingest_data( input.content, input.context, input.category, - file_infos, + &file_infos, &user_id, )?; diff --git a/common/src/storage/store.rs b/common/src/storage/store.rs index 02b463d..37c0392 100644 --- a/common/src/storage/store.rs +++ b/common/src/storage/store.rs @@ -32,7 +32,7 @@ impl StorageManager { /// This method validates the configuration and creates the appropriate /// storage backend with proper initialization. pub async fn new(cfg: &AppConfig) -> object_store::Result { - let backend_kind = cfg.storage.clone(); + let backend_kind = cfg.storage; let (store, local_base) = create_storage_backend(cfg).await?; Ok(Self { diff --git a/common/src/storage/types/ingestion_payload.rs b/common/src/storage/types/ingestion_payload.rs index 2adc942..640c841 100644 --- a/common/src/storage/types/ingestion_payload.rs +++ b/common/src/storage/types/ingestion_payload.rs @@ -49,7 +49,7 @@ impl IngestionPayload { content: Option, context: String, category: String, - files: Vec, + files: &[FileInfo], user_id: &str, ) -> Result, AppError> { // Initialize list @@ -83,7 +83,7 @@ impl IngestionPayload { for file in files { object_list.push(IngestionPayload::File { - file_info: file, + file_info: file.clone(), context: context.clone(), category: category.clone(), user_id: user_id.into(), @@ -143,7 +143,7 @@ mod tests { Some(url.to_string()), context.to_string(), category.to_string(), - files, + &files, user_id, ) .with_context(|| "create_ingestion_payload".to_string())?; @@ -179,7 +179,7 @@ mod tests { Some(text.to_string()), context.to_string(), category.to_string(), - files, + &files, user_id, ) .with_context(|| "create_ingestion_payload".to_string())?; @@ -220,7 +220,7 @@ mod tests { None, context.to_string(), category.to_string(), - files, + &files, user_id, ) .with_context(|| "create_ingestion_payload".to_string())?; @@ -262,7 +262,7 @@ mod tests { Some(url.to_string()), context.to_string(), category.to_string(), - files, + &files, user_id, ) .with_context(|| "create_ingestion_payload".to_string())?; @@ -304,7 +304,7 @@ mod tests { None, context.to_string(), category.to_string(), - files, + &files, user_id, ); @@ -330,7 +330,7 @@ mod tests { Some(text.to_string()), context.to_string(), category.to_string(), - files, + &files, user_id, ); diff --git a/common/src/storage/types/ingestion_task.rs b/common/src/storage/types/ingestion_task.rs index faaadcc..e17305b 100644 --- a/common/src/storage/types/ingestion_task.rs +++ b/common/src/storage/types/ingestion_task.rs @@ -22,7 +22,7 @@ pub const MAX_ATTEMPTS: u32 = 3; pub const DEFAULT_LEASE_SECS: i64 = 300; pub const DEFAULT_PRIORITY: i32 = 0; -#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub enum TaskState { #[serde(rename = "Pending")] #[default] diff --git a/common/src/storage/types/knowledge_entity.rs b/common/src/storage/types/knowledge_entity.rs index a1307b4..5d5e3cc 100644 --- a/common/src/storage/types/knowledge_entity.rs +++ b/common/src/storage/types/knowledge_entity.rs @@ -9,6 +9,7 @@ clippy::redundant_closure_for_method_calls )] use std::collections::HashMap; +use std::fmt::Write; use crate::{ error::AppError, storage::db::SurrealDbClient, @@ -23,7 +24,7 @@ use tokio_retry::{ use tracing::{error, info}; use uuid::Uuid; -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] pub enum KnowledgeEntityType { Idea, Project, @@ -402,14 +403,14 @@ impl KnowledgeEntity { // Add all update statements to the embedding table for (id, (embedding, user_id)) in new_embeddings { - let embedding_str = format!( - "[{}]", - embedding - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ); + let mut embedding_str = String::from("["); + for (i, f) in embedding.iter().enumerate() { + if i > 0 { + embedding_str.push(','); + } + write!(embedding_str, "{f}").unwrap_or_default(); + } + embedding_str.push(']'); transaction_query.push_str(&format!( "UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \ entity_id = type::thing('knowledge_entity', '{id}'), \ @@ -528,14 +529,14 @@ impl KnowledgeEntity { let mut transaction_query = String::from("BEGIN TRANSACTION;"); for (id, (embedding, user_id)) in new_embeddings { - let embedding_str = format!( - "[{}]", - embedding - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ); + let mut embedding_str = String::from("["); + for (i, f) in embedding.iter().enumerate() { + if i > 0 { + embedding_str.push(','); + } + write!(embedding_str, "{f}").unwrap_or_default(); + } + embedding_str.push(']'); transaction_query.push_str(&format!( "CREATE type::thing('knowledge_entity_embedding', '{id}') SET \ entity_id = type::thing('knowledge_entity', '{id}'), \ @@ -590,7 +591,7 @@ mod tests { source_id.clone(), name.clone(), description.clone(), - entity_type.clone(), + entity_type, metadata.clone(), user_id.clone(), ); @@ -682,7 +683,7 @@ mod tests { source_id.clone(), "Entity 1".to_string(), "Description 1".to_string(), - entity_type.clone(), + entity_type, None, user_id.clone(), ); @@ -691,7 +692,7 @@ mod tests { source_id.clone(), "Entity 2".to_string(), "Description 2".to_string(), - entity_type.clone(), + entity_type, None, user_id.clone(), ); @@ -701,7 +702,7 @@ mod tests { different_source_id.clone(), "Different Entity".to_string(), "Different Description".to_string(), - entity_type.clone(), + entity_type, None, user_id.clone(), ); diff --git a/common/src/storage/types/message.rs b/common/src/storage/types/message.rs index da0a612..4f2f7f8 100644 --- a/common/src/storage/types/message.rs +++ b/common/src/storage/types/message.rs @@ -1,11 +1,11 @@ #![allow(clippy::module_name_repetitions)] use uuid::Uuid; -use std::fmt; +use std::fmt::Write; use crate::stored_object; -#[derive(Deserialize, Debug, Clone, Serialize, PartialEq)] +#[derive(Deserialize, Debug, Clone, Copy, Serialize, PartialEq)] pub enum MessageRole { User, AI, @@ -57,11 +57,14 @@ impl fmt::Display for Message { // helper function to format a vector of messages pub fn format_history(history: &[Message]) -> String { - history - .iter() - .map(|msg| format!("{msg}")) - .collect::>() - .join("\n") + let mut out = String::new(); + for (i, msg) in history.iter().enumerate() { + if i > 0 { + out.push('\n'); + } + write!(out, "{msg}").unwrap_or_default(); + } + out } #[cfg(test)] @@ -79,7 +82,7 @@ mod tests { let message = Message::new( conversation_id.to_string(), - role.clone(), + role, content.to_string(), references.clone(), ); diff --git a/common/src/storage/types/text_chunk.rs b/common/src/storage/types/text_chunk.rs index c336813..29a6665 100644 --- a/common/src/storage/types/text_chunk.rs +++ b/common/src/storage/types/text_chunk.rs @@ -288,14 +288,14 @@ impl TextChunk { let mut transaction_query = String::from("BEGIN TRANSACTION;"); for (id, (embedding, user_id, source_id)) in new_embeddings { - let embedding_str = format!( - "[{}]", - embedding - .iter() - .map(ToString::to_string) - .collect::>() - .join(",") - ); + let mut embedding_str = String::from("["); + for (i, f) in embedding.iter().enumerate() { + if i > 0 { + embedding_str.push(','); + } + write!(embedding_str, "{f}").unwrap_or_default(); + } + embedding_str.push(']'); // Use the chunk id as the embedding record id to keep a 1:1 mapping let embedding = embedding_str; write!( @@ -407,14 +407,14 @@ impl TextChunk { let mut transaction_query = String::from("BEGIN TRANSACTION;"); for (id, (embedding, user_id, source_id)) in new_embeddings { - let embedding_str = format!( - "[{}]", - embedding - .iter() - .map(ToString::to_string) - .collect::>() - .join(",") - ); + let mut embedding_str = String::from("["); + for (i, f) in embedding.iter().enumerate() { + if i > 0 { + embedding_str.push(','); + } + write!(embedding_str, "{f}").unwrap_or_default(); + } + embedding_str.push(']'); let embedding = embedding_str; write!( &mut transaction_query, diff --git a/common/src/utils/config.rs b/common/src/utils/config.rs index 95aa13d..04d0eb2 100644 --- a/common/src/utils/config.rs +++ b/common/src/utils/config.rs @@ -3,7 +3,7 @@ use serde::Deserialize; use std::env; /// Selects the embedding backend for vector generation. -#[derive(Clone, Deserialize, Debug, Default, PartialEq)] +#[derive(Clone, Copy, Deserialize, Debug, Default, PartialEq)] #[serde(rename_all = "lowercase")] pub enum EmbeddingBackend { /// Use OpenAI-compatible API for embeddings. @@ -15,7 +15,7 @@ pub enum EmbeddingBackend { Hashed, } -#[derive(Clone, Deserialize, Debug, PartialEq)] +#[derive(Clone, Copy, Deserialize, Debug, PartialEq)] #[serde(rename_all = "lowercase")] pub enum StorageKind { Local, @@ -33,7 +33,7 @@ fn default_s3_region() -> String { } /// Selects the strategy used for PDF ingestion. -#[derive(Clone, Deserialize, Debug)] +#[derive(Clone, Copy, Deserialize, Debug)] #[serde(rename_all = "kebab-case")] pub enum PdfIngestMode { /// Only rely on classic text extraction (no LLM fallbacks). diff --git a/html-router/src/routes/ingestion/handlers.rs b/html-router/src/routes/ingestion/handlers.rs index d79c18d..51e4cea 100644 --- a/html-router/src/routes/ingestion/handlers.rs +++ b/html-router/src/routes/ingestion/handlers.rs @@ -142,7 +142,7 @@ pub async fn process_ingest_form( input.content, input.context, input.category, - file_infos, + &file_infos, user.id.as_str(), )?;