mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-27 18:09:28 +02:00
refactor: single object queue
This commit is contained in:
@@ -1,94 +0,0 @@
|
||||
use std::hash::Hash;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use tracing::info;
|
||||
use url::Url;
|
||||
use uuid::Uuid;
|
||||
use crate::redis::client::RedisClient;
|
||||
|
||||
use super::file_info::FileInfo;
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub enum Content {
|
||||
Url(String),
|
||||
Text(String),
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IngressInput {
|
||||
pub content: Option<String>,
|
||||
pub instructions: String,
|
||||
pub category: String,
|
||||
pub files: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IngressContent {
|
||||
pub content: Option<Content>,
|
||||
pub instructions: String,
|
||||
pub category: String,
|
||||
pub files: Option<Vec<FileInfo>>,
|
||||
}
|
||||
|
||||
/// Error types for file and content handling.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum IngressContentError {
|
||||
#[error("IO error occurred: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("UTF-8 conversion error: {0}")]
|
||||
Utf8(#[from] std::string::FromUtf8Error),
|
||||
|
||||
#[error("MIME type detection failed for input: {0}")]
|
||||
MimeDetection(String),
|
||||
|
||||
#[error("Unsupported MIME type: {0}")]
|
||||
UnsupportedMime(String),
|
||||
|
||||
#[error("URL parse error: {0}")]
|
||||
UrlParse(#[from] url::ParseError),
|
||||
}
|
||||
|
||||
impl IngressContent {
|
||||
/// Create a new `IngressContent` from `IngressInput`.
|
||||
pub async fn new(
|
||||
input: IngressInput,
|
||||
redis_client: &RedisClient, // Add RedisClient as a parameter
|
||||
) -> Result<IngressContent, IngressContentError> {
|
||||
let content = if let Some(input_content) = input.content {
|
||||
// Check if the content is a URL
|
||||
if let Ok(url) = Url::parse(&input_content) {
|
||||
info!("Detected URL: {}", url);
|
||||
Some(Content::Url(url.to_string()))
|
||||
} else {
|
||||
info!("Treating input as plain text");
|
||||
Some(Content::Text(input_content))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Fetch file information if file UUIDs are provided
|
||||
let files = if let Some(file_uuids) = input.files {
|
||||
let mut file_info_list = Vec::new();
|
||||
for uuid_str in file_uuids {
|
||||
let uuid = Uuid::parse_str(&uuid_str).map_err(|_| IngressContentError::UnsupportedMime("Invalid UUID".into()))?;
|
||||
match FileInfo::get(uuid, redis_client).await {
|
||||
Ok(file_info) => file_info_list.push(file_info),
|
||||
Err(_) => info!("No file with that uuid"),
|
||||
}
|
||||
}
|
||||
Some(file_info_list)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(IngressContent {
|
||||
content,
|
||||
instructions: input.instructions,
|
||||
category: input.category,
|
||||
files,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use tracing::info;
|
||||
use url::Url;
|
||||
use uuid::Uuid;
|
||||
use crate::redis::client::RedisClient;
|
||||
|
||||
use super::{file_info::FileInfo, ingress_object::IngressObject };
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IngressInput {
|
||||
pub content: Option<String>,
|
||||
pub instructions: String,
|
||||
pub category: String,
|
||||
pub files: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
/// Error types for processing ingress content.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum IngressContentError {
|
||||
#[error("IO error occurred: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("UTF-8 conversion error: {0}")]
|
||||
Utf8(#[from] std::string::FromUtf8Error),
|
||||
|
||||
#[error("MIME type detection failed for input: {0}")]
|
||||
MimeDetection(String),
|
||||
|
||||
#[error("Unsupported MIME type: {0}")]
|
||||
UnsupportedMime(String),
|
||||
|
||||
#[error("URL parse error: {0}")]
|
||||
UrlParse(#[from] url::ParseError),
|
||||
|
||||
#[error("UUID parse error: {0}")]
|
||||
UuidParse(#[from] uuid::Error),
|
||||
|
||||
#[error("Redis error: {0}")]
|
||||
RedisError(String),
|
||||
}
|
||||
|
||||
/// Function to create ingress objects from input.
|
||||
pub async fn create_ingress_objects(
|
||||
input: IngressInput,
|
||||
redis_client: &RedisClient,
|
||||
) -> Result<Vec<IngressObject>, IngressContentError> {
|
||||
let mut object_list = Vec::new();
|
||||
|
||||
if let Some(input_content) = input.content {
|
||||
match Url::parse(&input_content) {
|
||||
Ok(url) => {
|
||||
info!("Detected URL: {}", url);
|
||||
object_list.push(IngressObject::Url {
|
||||
url: url.to_string(),
|
||||
instructions: input.instructions.clone(),
|
||||
category: input.category.clone(),
|
||||
});
|
||||
}
|
||||
Err(_) => {
|
||||
info!("Treating input as plain text");
|
||||
object_list.push(IngressObject::Text {
|
||||
text: input_content.to_string(),
|
||||
instructions: input.instructions.clone(),
|
||||
category: input.category.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(file_uuids) = input.files {
|
||||
for uuid_str in file_uuids {
|
||||
let uuid = Uuid::parse_str(&uuid_str)?;
|
||||
match FileInfo::get(uuid, redis_client).await {
|
||||
Ok(file_info) => {
|
||||
object_list.push(IngressObject::File {
|
||||
file_info,
|
||||
instructions: input.instructions.clone(),
|
||||
category: input.category.clone(),
|
||||
});
|
||||
}
|
||||
Err(_) => {
|
||||
info!("No file with UUID: {}", uuid);
|
||||
// Optionally, you can collect errors or continue silently
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if object_list.is_empty() {
|
||||
return Err(IngressContentError::MimeDetection(
|
||||
"No valid content or files provided".into(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(object_list)
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
use crate::models::file_info::FileInfo;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{ingress_content::IngressContentError, text_content::TextContent};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum IngressObject {
|
||||
Url {
|
||||
url: String,
|
||||
instructions: String,
|
||||
category: String,
|
||||
},
|
||||
Text {
|
||||
text: String,
|
||||
instructions: String,
|
||||
category: String,
|
||||
},
|
||||
File {
|
||||
file_info: FileInfo,
|
||||
instructions: String,
|
||||
category: String,
|
||||
},
|
||||
}
|
||||
impl IngressObject {
|
||||
pub async fn to_text_content(&self) -> Result<TextContent, IngressContentError> {
|
||||
match self {
|
||||
IngressObject::Url { url, instructions, category } => {
|
||||
let text = Self::fetch_text_from_url(url).await?;
|
||||
Ok(TextContent {
|
||||
text,
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
file_info: None,
|
||||
})
|
||||
},
|
||||
IngressObject::Text { text, instructions, category } => {
|
||||
Ok(TextContent {
|
||||
text: text.clone(),
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
file_info: None,
|
||||
})
|
||||
},
|
||||
IngressObject::File { file_info, instructions, category } => {
|
||||
let text = Self::extract_text_from_file(file_info).await?;
|
||||
Ok(TextContent {
|
||||
text,
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
file_info: Some(file_info.clone()),
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetches and extracts text from a URL.
|
||||
async fn fetch_text_from_url(url: &str) -> Result<String, IngressContentError> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Extracts text from a file based on its MIME type.
|
||||
async fn extract_text_from_file(file_info: &FileInfo) -> Result<String, IngressContentError> {
|
||||
match file_info.mime_type.as_str() {
|
||||
"text/plain" => {
|
||||
// Read the file and return its content
|
||||
let content = tokio::fs::read_to_string(&file_info.path).await?;
|
||||
Ok(content)
|
||||
}
|
||||
"application/pdf" => {
|
||||
// TODO: Implement PDF text extraction using a crate like `pdf-extract` or `lopdf`
|
||||
Err(IngressContentError::UnsupportedMime(file_info.mime_type.clone()))
|
||||
}
|
||||
"image/png" | "image/jpeg" => {
|
||||
// TODO: Implement OCR on image using a crate like `tesseract`
|
||||
Err(IngressContentError::UnsupportedMime(file_info.mime_type.clone()))
|
||||
}
|
||||
// Handle other MIME types as needed
|
||||
_ => Err(IngressContentError::UnsupportedMime(file_info.mime_type.clone())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+3
-1
@@ -1,2 +1,4 @@
|
||||
pub mod file_info;
|
||||
pub mod ingress;
|
||||
pub mod ingress_content;
|
||||
pub mod ingress_object;
|
||||
pub mod text_content;
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::models::file_info::FileInfo;
|
||||
use thiserror::Error;
|
||||
|
||||
/// Represents a single piece of text content extracted from various sources.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TextContent {
|
||||
pub text: String,
|
||||
pub file_info: Option<FileInfo>,
|
||||
pub instructions: String,
|
||||
pub category: String,
|
||||
}
|
||||
|
||||
impl TextContent {
|
||||
/// Creates a new `TextContent` instance.
|
||||
pub fn new(text: String, file_info: Option<FileInfo>, instructions: String, category: String) -> Self {
|
||||
Self {
|
||||
text,
|
||||
file_info,
|
||||
instructions,
|
||||
category,
|
||||
}
|
||||
}
|
||||
|
||||
/// Processes the `TextContent` by sending it to an LLM, storing in a graph DB, and vector DB.
|
||||
pub async fn process(&self) -> Result<(), ProcessingError> {
|
||||
// Step 1: Send to LLM for analysis
|
||||
let analysis = self.send_to_llm().await?;
|
||||
|
||||
// Step 2: Store analysis results in Graph DB
|
||||
self.store_in_graph_db(&analysis).await?;
|
||||
|
||||
// Step 3: Split text and store in Vector DB
|
||||
self.store_in_vector_db().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Sends text to an LLM for analysis.
|
||||
async fn send_to_llm(&self) -> Result<LLMAnalysis, ProcessingError> {
|
||||
// TODO: Implement interaction with your specific LLM API.
|
||||
// Example using reqwest:
|
||||
/*
|
||||
let client = reqwest::Client::new();
|
||||
let response = client.post("http://llm-api/analyze")
|
||||
.json(&serde_json::json!({ "text": self.text }))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| ProcessingError::LLMError(e.to_string()))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(ProcessingError::LLMError(format!("LLM API returned status: {}", response.status())));
|
||||
}
|
||||
|
||||
let analysis: LLMAnalysis = response.json().await
|
||||
.map_err(|e| ProcessingError::LLMError(e.to_string()))?;
|
||||
|
||||
Ok(analysis)
|
||||
*/
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Stores analysis results in a graph database.
|
||||
async fn store_in_graph_db(&self, analysis: &LLMAnalysis) -> Result<(), ProcessingError> {
|
||||
// TODO: Implement storage logic for your specific graph database.
|
||||
// Example:
|
||||
/*
|
||||
let graph_db = GraphDB::new("http://graph-db:8080");
|
||||
graph_db.insert_analysis(analysis).await.map_err(|e| ProcessingError::GraphDBError(e.to_string()))?;
|
||||
*/
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Splits text and stores it in a vector database.
|
||||
async fn store_in_vector_db(&self) -> Result<(), ProcessingError> {
|
||||
// TODO: Implement text splitting and vector storage logic.
|
||||
// Example:
|
||||
/*
|
||||
let chunks = text_splitter::split(&self.text);
|
||||
let vector_db = VectorDB::new("http://vector-db:5000");
|
||||
for chunk in chunks {
|
||||
vector_db.insert(chunk).await.map_err(|e| ProcessingError::VectorDBError(e.to_string()))?;
|
||||
}
|
||||
*/
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the analysis results from the LLM.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct LLMAnalysis {
|
||||
pub entities: Vec<String>,
|
||||
pub summary: String,
|
||||
// Add other fields based on your LLM's output.
|
||||
}
|
||||
|
||||
/// Error types for processing `TextContent`.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ProcessingError {
|
||||
#[error("LLM processing error: {0}")]
|
||||
LLMError(String),
|
||||
|
||||
#[error("Graph DB storage error: {0}")]
|
||||
GraphDBError(String),
|
||||
|
||||
#[error("Vector DB storage error: {0}")]
|
||||
VectorDBError(String),
|
||||
|
||||
#[error("Unknown processing error")]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user