mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-28 10:29:30 +02:00
perf: avoid small own clones and intermediate Vec allocations
- Derive Copy on 6 small enums (MessageRole, TaskState, StorageKind, EmbeddingBackend, PdfIngestMode, KnowledgeEntityType) - Change create_ingestion_payload files param from Vec<FileInfo> to &[FileInfo] - Remove 5 intermediate Vec allocations (4 embedding serialization + 1 format_history) using write! loop - Remove 7 unnecessary .clone() calls exposed by Copy derive
This commit is contained in:
@@ -32,7 +32,7 @@ impl StorageManager {
|
||||
/// This method validates the configuration and creates the appropriate
|
||||
/// storage backend with proper initialization.
|
||||
pub async fn new(cfg: &AppConfig) -> object_store::Result<Self> {
|
||||
let backend_kind = cfg.storage.clone();
|
||||
let backend_kind = cfg.storage;
|
||||
let (store, local_base) = create_storage_backend(cfg).await?;
|
||||
|
||||
Ok(Self {
|
||||
|
||||
@@ -49,7 +49,7 @@ impl IngestionPayload {
|
||||
content: Option<String>,
|
||||
context: String,
|
||||
category: String,
|
||||
files: Vec<FileInfo>,
|
||||
files: &[FileInfo],
|
||||
user_id: &str,
|
||||
) -> Result<Vec<IngestionPayload>, AppError> {
|
||||
// Initialize list
|
||||
@@ -83,7 +83,7 @@ impl IngestionPayload {
|
||||
|
||||
for file in files {
|
||||
object_list.push(IngestionPayload::File {
|
||||
file_info: file,
|
||||
file_info: file.clone(),
|
||||
context: context.clone(),
|
||||
category: category.clone(),
|
||||
user_id: user_id.into(),
|
||||
@@ -143,7 +143,7 @@ mod tests {
|
||||
Some(url.to_string()),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
&files,
|
||||
user_id,
|
||||
)
|
||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||
@@ -179,7 +179,7 @@ mod tests {
|
||||
Some(text.to_string()),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
&files,
|
||||
user_id,
|
||||
)
|
||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||
@@ -220,7 +220,7 @@ mod tests {
|
||||
None,
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
&files,
|
||||
user_id,
|
||||
)
|
||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||
@@ -262,7 +262,7 @@ mod tests {
|
||||
Some(url.to_string()),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
&files,
|
||||
user_id,
|
||||
)
|
||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||
@@ -304,7 +304,7 @@ mod tests {
|
||||
None,
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
&files,
|
||||
user_id,
|
||||
);
|
||||
|
||||
@@ -330,7 +330,7 @@ mod tests {
|
||||
Some(text.to_string()),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
&files,
|
||||
user_id,
|
||||
);
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ pub const MAX_ATTEMPTS: u32 = 3;
|
||||
pub const DEFAULT_LEASE_SECS: i64 = 300;
|
||||
pub const DEFAULT_PRIORITY: i32 = 0;
|
||||
|
||||
#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
|
||||
#[derive(Debug, Default, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
|
||||
pub enum TaskState {
|
||||
#[serde(rename = "Pending")]
|
||||
#[default]
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
clippy::redundant_closure_for_method_calls
|
||||
)]
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
|
||||
use crate::{
|
||||
error::AppError, storage::db::SurrealDbClient,
|
||||
@@ -23,7 +24,7 @@ use tokio_retry::{
|
||||
use tracing::{error, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
|
||||
pub enum KnowledgeEntityType {
|
||||
Idea,
|
||||
Project,
|
||||
@@ -402,14 +403,14 @@ impl KnowledgeEntity {
|
||||
|
||||
// Add all update statements to the embedding table
|
||||
for (id, (embedding, user_id)) in new_embeddings {
|
||||
let embedding_str = format!(
|
||||
"[{}]",
|
||||
embedding
|
||||
.iter()
|
||||
.map(|f| f.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
let mut embedding_str = String::from("[");
|
||||
for (i, f) in embedding.iter().enumerate() {
|
||||
if i > 0 {
|
||||
embedding_str.push(',');
|
||||
}
|
||||
write!(embedding_str, "{f}").unwrap_or_default();
|
||||
}
|
||||
embedding_str.push(']');
|
||||
transaction_query.push_str(&format!(
|
||||
"UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \
|
||||
entity_id = type::thing('knowledge_entity', '{id}'), \
|
||||
@@ -528,14 +529,14 @@ impl KnowledgeEntity {
|
||||
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
||||
|
||||
for (id, (embedding, user_id)) in new_embeddings {
|
||||
let embedding_str = format!(
|
||||
"[{}]",
|
||||
embedding
|
||||
.iter()
|
||||
.map(|f| f.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
let mut embedding_str = String::from("[");
|
||||
for (i, f) in embedding.iter().enumerate() {
|
||||
if i > 0 {
|
||||
embedding_str.push(',');
|
||||
}
|
||||
write!(embedding_str, "{f}").unwrap_or_default();
|
||||
}
|
||||
embedding_str.push(']');
|
||||
transaction_query.push_str(&format!(
|
||||
"CREATE type::thing('knowledge_entity_embedding', '{id}') SET \
|
||||
entity_id = type::thing('knowledge_entity', '{id}'), \
|
||||
@@ -590,7 +591,7 @@ mod tests {
|
||||
source_id.clone(),
|
||||
name.clone(),
|
||||
description.clone(),
|
||||
entity_type.clone(),
|
||||
entity_type,
|
||||
metadata.clone(),
|
||||
user_id.clone(),
|
||||
);
|
||||
@@ -682,7 +683,7 @@ mod tests {
|
||||
source_id.clone(),
|
||||
"Entity 1".to_string(),
|
||||
"Description 1".to_string(),
|
||||
entity_type.clone(),
|
||||
entity_type,
|
||||
None,
|
||||
user_id.clone(),
|
||||
);
|
||||
@@ -691,7 +692,7 @@ mod tests {
|
||||
source_id.clone(),
|
||||
"Entity 2".to_string(),
|
||||
"Description 2".to_string(),
|
||||
entity_type.clone(),
|
||||
entity_type,
|
||||
None,
|
||||
user_id.clone(),
|
||||
);
|
||||
@@ -701,7 +702,7 @@ mod tests {
|
||||
different_source_id.clone(),
|
||||
"Different Entity".to_string(),
|
||||
"Different Description".to_string(),
|
||||
entity_type.clone(),
|
||||
entity_type,
|
||||
None,
|
||||
user_id.clone(),
|
||||
);
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#![allow(clippy::module_name_repetitions)]
|
||||
use uuid::Uuid;
|
||||
|
||||
use std::fmt;
|
||||
use std::fmt::Write;
|
||||
|
||||
use crate::stored_object;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, Serialize, PartialEq)]
|
||||
#[derive(Deserialize, Debug, Clone, Copy, Serialize, PartialEq)]
|
||||
pub enum MessageRole {
|
||||
User,
|
||||
AI,
|
||||
@@ -57,11 +57,14 @@ impl fmt::Display for Message {
|
||||
|
||||
// helper function to format a vector of messages
|
||||
pub fn format_history(history: &[Message]) -> String {
|
||||
history
|
||||
.iter()
|
||||
.map(|msg| format!("{msg}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
let mut out = String::new();
|
||||
for (i, msg) in history.iter().enumerate() {
|
||||
if i > 0 {
|
||||
out.push('\n');
|
||||
}
|
||||
write!(out, "{msg}").unwrap_or_default();
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -79,7 +82,7 @@ mod tests {
|
||||
|
||||
let message = Message::new(
|
||||
conversation_id.to_string(),
|
||||
role.clone(),
|
||||
role,
|
||||
content.to_string(),
|
||||
references.clone(),
|
||||
);
|
||||
|
||||
@@ -288,14 +288,14 @@ impl TextChunk {
|
||||
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
||||
|
||||
for (id, (embedding, user_id, source_id)) in new_embeddings {
|
||||
let embedding_str = format!(
|
||||
"[{}]",
|
||||
embedding
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
let mut embedding_str = String::from("[");
|
||||
for (i, f) in embedding.iter().enumerate() {
|
||||
if i > 0 {
|
||||
embedding_str.push(',');
|
||||
}
|
||||
write!(embedding_str, "{f}").unwrap_or_default();
|
||||
}
|
||||
embedding_str.push(']');
|
||||
// Use the chunk id as the embedding record id to keep a 1:1 mapping
|
||||
let embedding = embedding_str;
|
||||
write!(
|
||||
@@ -407,14 +407,14 @@ impl TextChunk {
|
||||
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
||||
|
||||
for (id, (embedding, user_id, source_id)) in new_embeddings {
|
||||
let embedding_str = format!(
|
||||
"[{}]",
|
||||
embedding
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
let mut embedding_str = String::from("[");
|
||||
for (i, f) in embedding.iter().enumerate() {
|
||||
if i > 0 {
|
||||
embedding_str.push(',');
|
||||
}
|
||||
write!(embedding_str, "{f}").unwrap_or_default();
|
||||
}
|
||||
embedding_str.push(']');
|
||||
let embedding = embedding_str;
|
||||
write!(
|
||||
&mut transaction_query,
|
||||
|
||||
@@ -3,7 +3,7 @@ use serde::Deserialize;
|
||||
use std::env;
|
||||
|
||||
/// Selects the embedding backend for vector generation.
|
||||
#[derive(Clone, Deserialize, Debug, Default, PartialEq)]
|
||||
#[derive(Clone, Copy, Deserialize, Debug, Default, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum EmbeddingBackend {
|
||||
/// Use OpenAI-compatible API for embeddings.
|
||||
@@ -15,7 +15,7 @@ pub enum EmbeddingBackend {
|
||||
Hashed,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq)]
|
||||
#[derive(Clone, Copy, Deserialize, Debug, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum StorageKind {
|
||||
Local,
|
||||
@@ -33,7 +33,7 @@ fn default_s3_region() -> String {
|
||||
}
|
||||
|
||||
/// Selects the strategy used for PDF ingestion.
|
||||
#[derive(Clone, Deserialize, Debug)]
|
||||
#[derive(Clone, Copy, Deserialize, Debug)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum PdfIngestMode {
|
||||
/// Only rely on classic text extraction (no LLM fallbacks).
|
||||
|
||||
Reference in New Issue
Block a user