mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-28 10:29:30 +02:00
perf: avoid small own clones and intermediate Vec allocations
- Derive Copy on 6 small enums (MessageRole, TaskState, StorageKind, EmbeddingBackend, PdfIngestMode, KnowledgeEntityType) - Change create_ingestion_payload files param from Vec<FileInfo> to &[FileInfo] - Remove 5 intermediate Vec allocations (4 embedding serialization + 1 format_history) using write! loop - Remove 7 unnecessary .clone() calls exposed by Copy derive
This commit is contained in:
@@ -73,7 +73,7 @@ pub async fn ingest_data(
|
|||||||
input.content,
|
input.content,
|
||||||
input.context,
|
input.context,
|
||||||
input.category,
|
input.category,
|
||||||
file_infos,
|
&file_infos,
|
||||||
&user_id,
|
&user_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ impl StorageManager {
|
|||||||
/// This method validates the configuration and creates the appropriate
|
/// This method validates the configuration and creates the appropriate
|
||||||
/// storage backend with proper initialization.
|
/// storage backend with proper initialization.
|
||||||
pub async fn new(cfg: &AppConfig) -> object_store::Result<Self> {
|
pub async fn new(cfg: &AppConfig) -> object_store::Result<Self> {
|
||||||
let backend_kind = cfg.storage.clone();
|
let backend_kind = cfg.storage;
|
||||||
let (store, local_base) = create_storage_backend(cfg).await?;
|
let (store, local_base) = create_storage_backend(cfg).await?;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ impl IngestionPayload {
|
|||||||
content: Option<String>,
|
content: Option<String>,
|
||||||
context: String,
|
context: String,
|
||||||
category: String,
|
category: String,
|
||||||
files: Vec<FileInfo>,
|
files: &[FileInfo],
|
||||||
user_id: &str,
|
user_id: &str,
|
||||||
) -> Result<Vec<IngestionPayload>, AppError> {
|
) -> Result<Vec<IngestionPayload>, AppError> {
|
||||||
// Initialize list
|
// Initialize list
|
||||||
@@ -83,7 +83,7 @@ impl IngestionPayload {
|
|||||||
|
|
||||||
for file in files {
|
for file in files {
|
||||||
object_list.push(IngestionPayload::File {
|
object_list.push(IngestionPayload::File {
|
||||||
file_info: file,
|
file_info: file.clone(),
|
||||||
context: context.clone(),
|
context: context.clone(),
|
||||||
category: category.clone(),
|
category: category.clone(),
|
||||||
user_id: user_id.into(),
|
user_id: user_id.into(),
|
||||||
@@ -143,7 +143,7 @@ mod tests {
|
|||||||
Some(url.to_string()),
|
Some(url.to_string()),
|
||||||
context.to_string(),
|
context.to_string(),
|
||||||
category.to_string(),
|
category.to_string(),
|
||||||
files,
|
&files,
|
||||||
user_id,
|
user_id,
|
||||||
)
|
)
|
||||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||||
@@ -179,7 +179,7 @@ mod tests {
|
|||||||
Some(text.to_string()),
|
Some(text.to_string()),
|
||||||
context.to_string(),
|
context.to_string(),
|
||||||
category.to_string(),
|
category.to_string(),
|
||||||
files,
|
&files,
|
||||||
user_id,
|
user_id,
|
||||||
)
|
)
|
||||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||||
@@ -220,7 +220,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
context.to_string(),
|
context.to_string(),
|
||||||
category.to_string(),
|
category.to_string(),
|
||||||
files,
|
&files,
|
||||||
user_id,
|
user_id,
|
||||||
)
|
)
|
||||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||||
@@ -262,7 +262,7 @@ mod tests {
|
|||||||
Some(url.to_string()),
|
Some(url.to_string()),
|
||||||
context.to_string(),
|
context.to_string(),
|
||||||
category.to_string(),
|
category.to_string(),
|
||||||
files,
|
&files,
|
||||||
user_id,
|
user_id,
|
||||||
)
|
)
|
||||||
.with_context(|| "create_ingestion_payload".to_string())?;
|
.with_context(|| "create_ingestion_payload".to_string())?;
|
||||||
@@ -304,7 +304,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
context.to_string(),
|
context.to_string(),
|
||||||
category.to_string(),
|
category.to_string(),
|
||||||
files,
|
&files,
|
||||||
user_id,
|
user_id,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -330,7 +330,7 @@ mod tests {
|
|||||||
Some(text.to_string()),
|
Some(text.to_string()),
|
||||||
context.to_string(),
|
context.to_string(),
|
||||||
category.to_string(),
|
category.to_string(),
|
||||||
files,
|
&files,
|
||||||
user_id,
|
user_id,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ pub const MAX_ATTEMPTS: u32 = 3;
|
|||||||
pub const DEFAULT_LEASE_SECS: i64 = 300;
|
pub const DEFAULT_LEASE_SECS: i64 = 300;
|
||||||
pub const DEFAULT_PRIORITY: i32 = 0;
|
pub const DEFAULT_PRIORITY: i32 = 0;
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Default, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
|
||||||
pub enum TaskState {
|
pub enum TaskState {
|
||||||
#[serde(rename = "Pending")]
|
#[serde(rename = "Pending")]
|
||||||
#[default]
|
#[default]
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
clippy::redundant_closure_for_method_calls
|
clippy::redundant_closure_for_method_calls
|
||||||
)]
|
)]
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::AppError, storage::db::SurrealDbClient,
|
error::AppError, storage::db::SurrealDbClient,
|
||||||
@@ -23,7 +24,7 @@ use tokio_retry::{
|
|||||||
use tracing::{error, info};
|
use tracing::{error, info};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
|
||||||
pub enum KnowledgeEntityType {
|
pub enum KnowledgeEntityType {
|
||||||
Idea,
|
Idea,
|
||||||
Project,
|
Project,
|
||||||
@@ -402,14 +403,14 @@ impl KnowledgeEntity {
|
|||||||
|
|
||||||
// Add all update statements to the embedding table
|
// Add all update statements to the embedding table
|
||||||
for (id, (embedding, user_id)) in new_embeddings {
|
for (id, (embedding, user_id)) in new_embeddings {
|
||||||
let embedding_str = format!(
|
let mut embedding_str = String::from("[");
|
||||||
"[{}]",
|
for (i, f) in embedding.iter().enumerate() {
|
||||||
embedding
|
if i > 0 {
|
||||||
.iter()
|
embedding_str.push(',');
|
||||||
.map(|f| f.to_string())
|
}
|
||||||
.collect::<Vec<_>>()
|
write!(embedding_str, "{f}").unwrap_or_default();
|
||||||
.join(",")
|
}
|
||||||
);
|
embedding_str.push(']');
|
||||||
transaction_query.push_str(&format!(
|
transaction_query.push_str(&format!(
|
||||||
"UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \
|
"UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \
|
||||||
entity_id = type::thing('knowledge_entity', '{id}'), \
|
entity_id = type::thing('knowledge_entity', '{id}'), \
|
||||||
@@ -528,14 +529,14 @@ impl KnowledgeEntity {
|
|||||||
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
||||||
|
|
||||||
for (id, (embedding, user_id)) in new_embeddings {
|
for (id, (embedding, user_id)) in new_embeddings {
|
||||||
let embedding_str = format!(
|
let mut embedding_str = String::from("[");
|
||||||
"[{}]",
|
for (i, f) in embedding.iter().enumerate() {
|
||||||
embedding
|
if i > 0 {
|
||||||
.iter()
|
embedding_str.push(',');
|
||||||
.map(|f| f.to_string())
|
}
|
||||||
.collect::<Vec<_>>()
|
write!(embedding_str, "{f}").unwrap_or_default();
|
||||||
.join(",")
|
}
|
||||||
);
|
embedding_str.push(']');
|
||||||
transaction_query.push_str(&format!(
|
transaction_query.push_str(&format!(
|
||||||
"CREATE type::thing('knowledge_entity_embedding', '{id}') SET \
|
"CREATE type::thing('knowledge_entity_embedding', '{id}') SET \
|
||||||
entity_id = type::thing('knowledge_entity', '{id}'), \
|
entity_id = type::thing('knowledge_entity', '{id}'), \
|
||||||
@@ -590,7 +591,7 @@ mod tests {
|
|||||||
source_id.clone(),
|
source_id.clone(),
|
||||||
name.clone(),
|
name.clone(),
|
||||||
description.clone(),
|
description.clone(),
|
||||||
entity_type.clone(),
|
entity_type,
|
||||||
metadata.clone(),
|
metadata.clone(),
|
||||||
user_id.clone(),
|
user_id.clone(),
|
||||||
);
|
);
|
||||||
@@ -682,7 +683,7 @@ mod tests {
|
|||||||
source_id.clone(),
|
source_id.clone(),
|
||||||
"Entity 1".to_string(),
|
"Entity 1".to_string(),
|
||||||
"Description 1".to_string(),
|
"Description 1".to_string(),
|
||||||
entity_type.clone(),
|
entity_type,
|
||||||
None,
|
None,
|
||||||
user_id.clone(),
|
user_id.clone(),
|
||||||
);
|
);
|
||||||
@@ -691,7 +692,7 @@ mod tests {
|
|||||||
source_id.clone(),
|
source_id.clone(),
|
||||||
"Entity 2".to_string(),
|
"Entity 2".to_string(),
|
||||||
"Description 2".to_string(),
|
"Description 2".to_string(),
|
||||||
entity_type.clone(),
|
entity_type,
|
||||||
None,
|
None,
|
||||||
user_id.clone(),
|
user_id.clone(),
|
||||||
);
|
);
|
||||||
@@ -701,7 +702,7 @@ mod tests {
|
|||||||
different_source_id.clone(),
|
different_source_id.clone(),
|
||||||
"Different Entity".to_string(),
|
"Different Entity".to_string(),
|
||||||
"Different Description".to_string(),
|
"Different Description".to_string(),
|
||||||
entity_type.clone(),
|
entity_type,
|
||||||
None,
|
None,
|
||||||
user_id.clone(),
|
user_id.clone(),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
#![allow(clippy::module_name_repetitions)]
|
#![allow(clippy::module_name_repetitions)]
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt::Write;
|
||||||
|
|
||||||
use crate::stored_object;
|
use crate::stored_object;
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, Serialize, PartialEq)]
|
#[derive(Deserialize, Debug, Clone, Copy, Serialize, PartialEq)]
|
||||||
pub enum MessageRole {
|
pub enum MessageRole {
|
||||||
User,
|
User,
|
||||||
AI,
|
AI,
|
||||||
@@ -57,11 +57,14 @@ impl fmt::Display for Message {
|
|||||||
|
|
||||||
// helper function to format a vector of messages
|
// helper function to format a vector of messages
|
||||||
pub fn format_history(history: &[Message]) -> String {
|
pub fn format_history(history: &[Message]) -> String {
|
||||||
history
|
let mut out = String::new();
|
||||||
.iter()
|
for (i, msg) in history.iter().enumerate() {
|
||||||
.map(|msg| format!("{msg}"))
|
if i > 0 {
|
||||||
.collect::<Vec<String>>()
|
out.push('\n');
|
||||||
.join("\n")
|
}
|
||||||
|
write!(out, "{msg}").unwrap_or_default();
|
||||||
|
}
|
||||||
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -79,7 +82,7 @@ mod tests {
|
|||||||
|
|
||||||
let message = Message::new(
|
let message = Message::new(
|
||||||
conversation_id.to_string(),
|
conversation_id.to_string(),
|
||||||
role.clone(),
|
role,
|
||||||
content.to_string(),
|
content.to_string(),
|
||||||
references.clone(),
|
references.clone(),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -288,14 +288,14 @@ impl TextChunk {
|
|||||||
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
||||||
|
|
||||||
for (id, (embedding, user_id, source_id)) in new_embeddings {
|
for (id, (embedding, user_id, source_id)) in new_embeddings {
|
||||||
let embedding_str = format!(
|
let mut embedding_str = String::from("[");
|
||||||
"[{}]",
|
for (i, f) in embedding.iter().enumerate() {
|
||||||
embedding
|
if i > 0 {
|
||||||
.iter()
|
embedding_str.push(',');
|
||||||
.map(ToString::to_string)
|
}
|
||||||
.collect::<Vec<_>>()
|
write!(embedding_str, "{f}").unwrap_or_default();
|
||||||
.join(",")
|
}
|
||||||
);
|
embedding_str.push(']');
|
||||||
// Use the chunk id as the embedding record id to keep a 1:1 mapping
|
// Use the chunk id as the embedding record id to keep a 1:1 mapping
|
||||||
let embedding = embedding_str;
|
let embedding = embedding_str;
|
||||||
write!(
|
write!(
|
||||||
@@ -407,14 +407,14 @@ impl TextChunk {
|
|||||||
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
let mut transaction_query = String::from("BEGIN TRANSACTION;");
|
||||||
|
|
||||||
for (id, (embedding, user_id, source_id)) in new_embeddings {
|
for (id, (embedding, user_id, source_id)) in new_embeddings {
|
||||||
let embedding_str = format!(
|
let mut embedding_str = String::from("[");
|
||||||
"[{}]",
|
for (i, f) in embedding.iter().enumerate() {
|
||||||
embedding
|
if i > 0 {
|
||||||
.iter()
|
embedding_str.push(',');
|
||||||
.map(ToString::to_string)
|
}
|
||||||
.collect::<Vec<_>>()
|
write!(embedding_str, "{f}").unwrap_or_default();
|
||||||
.join(",")
|
}
|
||||||
);
|
embedding_str.push(']');
|
||||||
let embedding = embedding_str;
|
let embedding = embedding_str;
|
||||||
write!(
|
write!(
|
||||||
&mut transaction_query,
|
&mut transaction_query,
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use serde::Deserialize;
|
|||||||
use std::env;
|
use std::env;
|
||||||
|
|
||||||
/// Selects the embedding backend for vector generation.
|
/// Selects the embedding backend for vector generation.
|
||||||
#[derive(Clone, Deserialize, Debug, Default, PartialEq)]
|
#[derive(Clone, Copy, Deserialize, Debug, Default, PartialEq)]
|
||||||
#[serde(rename_all = "lowercase")]
|
#[serde(rename_all = "lowercase")]
|
||||||
pub enum EmbeddingBackend {
|
pub enum EmbeddingBackend {
|
||||||
/// Use OpenAI-compatible API for embeddings.
|
/// Use OpenAI-compatible API for embeddings.
|
||||||
@@ -15,7 +15,7 @@ pub enum EmbeddingBackend {
|
|||||||
Hashed,
|
Hashed,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Debug, PartialEq)]
|
#[derive(Clone, Copy, Deserialize, Debug, PartialEq)]
|
||||||
#[serde(rename_all = "lowercase")]
|
#[serde(rename_all = "lowercase")]
|
||||||
pub enum StorageKind {
|
pub enum StorageKind {
|
||||||
Local,
|
Local,
|
||||||
@@ -33,7 +33,7 @@ fn default_s3_region() -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Selects the strategy used for PDF ingestion.
|
/// Selects the strategy used for PDF ingestion.
|
||||||
#[derive(Clone, Deserialize, Debug)]
|
#[derive(Clone, Copy, Deserialize, Debug)]
|
||||||
#[serde(rename_all = "kebab-case")]
|
#[serde(rename_all = "kebab-case")]
|
||||||
pub enum PdfIngestMode {
|
pub enum PdfIngestMode {
|
||||||
/// Only rely on classic text extraction (no LLM fallbacks).
|
/// Only rely on classic text extraction (no LLM fallbacks).
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ pub async fn process_ingest_form(
|
|||||||
input.content,
|
input.content,
|
||||||
input.context,
|
input.context,
|
||||||
input.category,
|
input.category,
|
||||||
file_infos,
|
&file_infos,
|
||||||
user.id.as_str(),
|
user.id.as_str(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user