perf: avoid small own clones and intermediate Vec allocations

- Derive Copy on 6 small enums (MessageRole, TaskState, StorageKind, EmbeddingBackend, PdfIngestMode, KnowledgeEntityType)
- Change create_ingestion_payload files param from Vec<FileInfo> to &[FileInfo]
- Remove 5 intermediate Vec allocations (4 embedding serialization + 1 format_history) using write! loop
- Remove 7 unnecessary .clone() calls exposed by Copy derive
This commit is contained in:
Per Stark
2026-05-27 10:28:08 +02:00
parent f5f0454904
commit c60db0fb56
9 changed files with 64 additions and 60 deletions
+1 -1
View File
@@ -73,7 +73,7 @@ pub async fn ingest_data(
input.content,
input.context,
input.category,
file_infos,
&file_infos,
&user_id,
)?;
+1 -1
View File
@@ -32,7 +32,7 @@ impl StorageManager {
/// This method validates the configuration and creates the appropriate
/// storage backend with proper initialization.
pub async fn new(cfg: &AppConfig) -> object_store::Result<Self> {
let backend_kind = cfg.storage.clone();
let backend_kind = cfg.storage;
let (store, local_base) = create_storage_backend(cfg).await?;
Ok(Self {
@@ -49,7 +49,7 @@ impl IngestionPayload {
content: Option<String>,
context: String,
category: String,
files: Vec<FileInfo>,
files: &[FileInfo],
user_id: &str,
) -> Result<Vec<IngestionPayload>, AppError> {
// Initialize list
@@ -83,7 +83,7 @@ impl IngestionPayload {
for file in files {
object_list.push(IngestionPayload::File {
file_info: file,
file_info: file.clone(),
context: context.clone(),
category: category.clone(),
user_id: user_id.into(),
@@ -143,7 +143,7 @@ mod tests {
Some(url.to_string()),
context.to_string(),
category.to_string(),
files,
&files,
user_id,
)
.with_context(|| "create_ingestion_payload".to_string())?;
@@ -179,7 +179,7 @@ mod tests {
Some(text.to_string()),
context.to_string(),
category.to_string(),
files,
&files,
user_id,
)
.with_context(|| "create_ingestion_payload".to_string())?;
@@ -220,7 +220,7 @@ mod tests {
None,
context.to_string(),
category.to_string(),
files,
&files,
user_id,
)
.with_context(|| "create_ingestion_payload".to_string())?;
@@ -262,7 +262,7 @@ mod tests {
Some(url.to_string()),
context.to_string(),
category.to_string(),
files,
&files,
user_id,
)
.with_context(|| "create_ingestion_payload".to_string())?;
@@ -304,7 +304,7 @@ mod tests {
None,
context.to_string(),
category.to_string(),
files,
&files,
user_id,
);
@@ -330,7 +330,7 @@ mod tests {
Some(text.to_string()),
context.to_string(),
category.to_string(),
files,
&files,
user_id,
);
+1 -1
View File
@@ -22,7 +22,7 @@ pub const MAX_ATTEMPTS: u32 = 3;
pub const DEFAULT_LEASE_SECS: i64 = 300;
pub const DEFAULT_PRIORITY: i32 = 0;
#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
#[derive(Debug, Default, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
pub enum TaskState {
#[serde(rename = "Pending")]
#[default]
+22 -21
View File
@@ -9,6 +9,7 @@
clippy::redundant_closure_for_method_calls
)]
use std::collections::HashMap;
use std::fmt::Write;
use crate::{
error::AppError, storage::db::SurrealDbClient,
@@ -23,7 +24,7 @@ use tokio_retry::{
use tracing::{error, info};
use uuid::Uuid;
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
pub enum KnowledgeEntityType {
Idea,
Project,
@@ -402,14 +403,14 @@ impl KnowledgeEntity {
// Add all update statements to the embedding table
for (id, (embedding, user_id)) in new_embeddings {
let embedding_str = format!(
"[{}]",
embedding
.iter()
.map(|f| f.to_string())
.collect::<Vec<_>>()
.join(",")
);
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
transaction_query.push_str(&format!(
"UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \
entity_id = type::thing('knowledge_entity', '{id}'), \
@@ -528,14 +529,14 @@ impl KnowledgeEntity {
let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id)) in new_embeddings {
let embedding_str = format!(
"[{}]",
embedding
.iter()
.map(|f| f.to_string())
.collect::<Vec<_>>()
.join(",")
);
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
transaction_query.push_str(&format!(
"CREATE type::thing('knowledge_entity_embedding', '{id}') SET \
entity_id = type::thing('knowledge_entity', '{id}'), \
@@ -590,7 +591,7 @@ mod tests {
source_id.clone(),
name.clone(),
description.clone(),
entity_type.clone(),
entity_type,
metadata.clone(),
user_id.clone(),
);
@@ -682,7 +683,7 @@ mod tests {
source_id.clone(),
"Entity 1".to_string(),
"Description 1".to_string(),
entity_type.clone(),
entity_type,
None,
user_id.clone(),
);
@@ -691,7 +692,7 @@ mod tests {
source_id.clone(),
"Entity 2".to_string(),
"Description 2".to_string(),
entity_type.clone(),
entity_type,
None,
user_id.clone(),
);
@@ -701,7 +702,7 @@ mod tests {
different_source_id.clone(),
"Different Entity".to_string(),
"Different Description".to_string(),
entity_type.clone(),
entity_type,
None,
user_id.clone(),
);
+11 -8
View File
@@ -1,11 +1,11 @@
#![allow(clippy::module_name_repetitions)]
use uuid::Uuid;
use std::fmt;
use std::fmt::Write;
use crate::stored_object;
#[derive(Deserialize, Debug, Clone, Serialize, PartialEq)]
#[derive(Deserialize, Debug, Clone, Copy, Serialize, PartialEq)]
pub enum MessageRole {
User,
AI,
@@ -57,11 +57,14 @@ impl fmt::Display for Message {
// helper function to format a vector of messages
pub fn format_history(history: &[Message]) -> String {
history
.iter()
.map(|msg| format!("{msg}"))
.collect::<Vec<String>>()
.join("\n")
let mut out = String::new();
for (i, msg) in history.iter().enumerate() {
if i > 0 {
out.push('\n');
}
write!(out, "{msg}").unwrap_or_default();
}
out
}
#[cfg(test)]
@@ -79,7 +82,7 @@ mod tests {
let message = Message::new(
conversation_id.to_string(),
role.clone(),
role,
content.to_string(),
references.clone(),
);
+16 -16
View File
@@ -288,14 +288,14 @@ impl TextChunk {
let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id, source_id)) in new_embeddings {
let embedding_str = format!(
"[{}]",
embedding
.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join(",")
);
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
// Use the chunk id as the embedding record id to keep a 1:1 mapping
let embedding = embedding_str;
write!(
@@ -407,14 +407,14 @@ impl TextChunk {
let mut transaction_query = String::from("BEGIN TRANSACTION;");
for (id, (embedding, user_id, source_id)) in new_embeddings {
let embedding_str = format!(
"[{}]",
embedding
.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join(",")
);
let mut embedding_str = String::from("[");
for (i, f) in embedding.iter().enumerate() {
if i > 0 {
embedding_str.push(',');
}
write!(embedding_str, "{f}").unwrap_or_default();
}
embedding_str.push(']');
let embedding = embedding_str;
write!(
&mut transaction_query,
+3 -3
View File
@@ -3,7 +3,7 @@ use serde::Deserialize;
use std::env;
/// Selects the embedding backend for vector generation.
#[derive(Clone, Deserialize, Debug, Default, PartialEq)]
#[derive(Clone, Copy, Deserialize, Debug, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum EmbeddingBackend {
/// Use OpenAI-compatible API for embeddings.
@@ -15,7 +15,7 @@ pub enum EmbeddingBackend {
Hashed,
}
#[derive(Clone, Deserialize, Debug, PartialEq)]
#[derive(Clone, Copy, Deserialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum StorageKind {
Local,
@@ -33,7 +33,7 @@ fn default_s3_region() -> String {
}
/// Selects the strategy used for PDF ingestion.
#[derive(Clone, Deserialize, Debug)]
#[derive(Clone, Copy, Deserialize, Debug)]
#[serde(rename_all = "kebab-case")]
pub enum PdfIngestMode {
/// Only rely on classic text extraction (no LLM fallbacks).
+1 -1
View File
@@ -142,7 +142,7 @@ pub async fn process_ingest_form(
input.content,
input.context,
input.category,
file_infos,
&file_infos,
user.id.as_str(),
)?;