mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-27 11:51:37 +01:00
removed ingressinput struct and consistent naming
This commit is contained in:
@@ -2,7 +2,7 @@ use crate::{
|
||||
error::AppError,
|
||||
ingress::analysis::prompt::{get_ingress_analysis_schema, INGRESS_ANALYSIS_SYSTEM_MESSAGE},
|
||||
retrieval::combined_knowledge_entity_retrieval,
|
||||
storage::types::knowledge_entity::KnowledgeEntity,
|
||||
storage::{db::SurrealDbClient, types::knowledge_entity::KnowledgeEntity},
|
||||
};
|
||||
use async_openai::{
|
||||
error::OpenAIError,
|
||||
@@ -13,20 +13,18 @@ use async_openai::{
|
||||
},
|
||||
};
|
||||
use serde_json::json;
|
||||
use surrealdb::engine::any::Any;
|
||||
use surrealdb::Surreal;
|
||||
use tracing::debug;
|
||||
|
||||
use super::types::llm_analysis_result::LLMGraphAnalysisResult;
|
||||
|
||||
pub struct IngressAnalyzer<'a> {
|
||||
db_client: &'a Surreal<Any>,
|
||||
db_client: &'a SurrealDbClient,
|
||||
openai_client: &'a async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
}
|
||||
|
||||
impl<'a> IngressAnalyzer<'a> {
|
||||
pub fn new(
|
||||
db_client: &'a Surreal<Any>,
|
||||
db_client: &'a SurrealDbClient,
|
||||
openai_client: &'a async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
) -> Self {
|
||||
Self {
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
use super::ingress_object::IngressObject;
|
||||
use crate::{error::AppError, storage::types::file_info::FileInfo};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::info;
|
||||
use url::Url;
|
||||
|
||||
/// Struct defining the expected body when ingressing content.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IngressInput {
|
||||
pub content: Option<String>,
|
||||
pub instructions: String,
|
||||
pub category: String,
|
||||
pub files: Vec<FileInfo>,
|
||||
}
|
||||
|
||||
/// Function to create ingress objects from input.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `input` - IngressInput containing information needed to ingress content.
|
||||
/// * `user_id` - User id of the ingressing user
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Vec<IngressObject>` - An array containing the ingressed objects, one file/contenttype per object.
|
||||
pub fn create_ingress_objects(
|
||||
input: IngressInput,
|
||||
user_id: &str,
|
||||
) -> Result<Vec<IngressObject>, AppError> {
|
||||
// Initialize list
|
||||
let mut object_list = Vec::new();
|
||||
|
||||
// Create a IngressObject from input.content if it exists, checking for URL or text
|
||||
if let Some(input_content) = input.content {
|
||||
match Url::parse(&input_content) {
|
||||
Ok(url) => {
|
||||
info!("Detected URL: {}", url);
|
||||
object_list.push(IngressObject::Url {
|
||||
url: url.to_string(),
|
||||
instructions: input.instructions.clone(),
|
||||
category: input.category.clone(),
|
||||
user_id: user_id.into(),
|
||||
});
|
||||
}
|
||||
Err(_) => {
|
||||
if input_content.len() > 2 {
|
||||
info!("Treating input as plain text");
|
||||
object_list.push(IngressObject::Text {
|
||||
text: input_content.to_string(),
|
||||
instructions: input.instructions.clone(),
|
||||
category: input.category.clone(),
|
||||
user_id: user_id.into(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for file in input.files {
|
||||
object_list.push(IngressObject::File {
|
||||
file_info: file,
|
||||
instructions: input.instructions.clone(),
|
||||
category: input.category.clone(),
|
||||
user_id: user_id.into(),
|
||||
})
|
||||
}
|
||||
|
||||
// If no objects are constructed, we return Err
|
||||
if object_list.is_empty() {
|
||||
return Err(AppError::NotFound(
|
||||
"No valid content or files provided".into(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(object_list)
|
||||
}
|
||||
@@ -13,6 +13,8 @@ use scraper::{Html, Selector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Write;
|
||||
use tiktoken_rs::{o200k_base, CoreBPE};
|
||||
use tracing::info;
|
||||
use url::Url;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum IngressObject {
|
||||
@@ -37,6 +39,72 @@ pub enum IngressObject {
|
||||
}
|
||||
|
||||
impl IngressObject {
|
||||
/// Creates ingress objects from the provided content, instructions, and files.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `content` - Optional textual content to be ingressed
|
||||
/// * `instructions` - Instructions for processing the ingress content
|
||||
/// * `category` - Category to classify the ingressed content
|
||||
/// * `files` - Vector of `FileInfo` objects containing information about uploaded files
|
||||
/// * `user_id` - Identifier of the user performing the ingress operation
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Result<Vec<IngressObject>, AppError>` - On success, returns a vector of ingress objects
|
||||
/// (one per file/content type). On failure, returns an `AppError`.
|
||||
pub fn create_ingress_objects(
|
||||
content: Option<String>,
|
||||
instructions: String,
|
||||
category: String,
|
||||
files: Vec<FileInfo>,
|
||||
user_id: &str,
|
||||
) -> Result<Vec<IngressObject>, AppError> {
|
||||
// Initialize list
|
||||
let mut object_list = Vec::new();
|
||||
|
||||
// Create a IngressObject from content if it exists, checking for URL or text
|
||||
if let Some(input_content) = content {
|
||||
match Url::parse(&input_content) {
|
||||
Ok(url) => {
|
||||
info!("Detected URL: {}", url);
|
||||
object_list.push(IngressObject::Url {
|
||||
url: url.to_string(),
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
user_id: user_id.into(),
|
||||
});
|
||||
}
|
||||
Err(_) => {
|
||||
if input_content.len() > 2 {
|
||||
info!("Treating input as plain text");
|
||||
object_list.push(IngressObject::Text {
|
||||
text: input_content.to_string(),
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
user_id: user_id.into(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for file in files {
|
||||
object_list.push(IngressObject::File {
|
||||
file_info: file,
|
||||
instructions: instructions.clone(),
|
||||
category: category.clone(),
|
||||
user_id: user_id.into(),
|
||||
})
|
||||
}
|
||||
|
||||
// If no objects are constructed, we return Err
|
||||
if object_list.is_empty() {
|
||||
return Err(AppError::NotFound(
|
||||
"No valid content or files provided".into(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(object_list)
|
||||
}
|
||||
/// Creates a new `TextContent` instance from a `IngressObject`.
|
||||
///
|
||||
/// # Arguments
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
pub mod analysis;
|
||||
pub mod content_processor;
|
||||
pub mod ingress_input;
|
||||
pub mod ingress_object;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use surrealdb::{engine::any::Any, Error, Surreal};
|
||||
use surrealdb::Error;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::storage::types::{knowledge_entity::KnowledgeEntity, StoredObject};
|
||||
use crate::storage::{db::SurrealDbClient, types::knowledge_entity::KnowledgeEntity};
|
||||
|
||||
/// Retrieves database entries that match a specific source identifier.
|
||||
///
|
||||
@@ -33,15 +33,14 @@ use crate::storage::types::{knowledge_entity::KnowledgeEntity, StoredObject};
|
||||
pub async fn find_entities_by_source_ids<T>(
|
||||
source_id: Vec<String>,
|
||||
table_name: String,
|
||||
db_client: &Surreal<Any>,
|
||||
db: &SurrealDbClient,
|
||||
) -> Result<Vec<T>, Error>
|
||||
where
|
||||
T: for<'de> serde::Deserialize<'de>,
|
||||
{
|
||||
let query = "SELECT * FROM type::table($table) WHERE source_id IN $source_ids";
|
||||
|
||||
db_client
|
||||
.query(query)
|
||||
db.query(query)
|
||||
.bind(("table", table_name))
|
||||
.bind(("source_ids", source_id))
|
||||
.await?
|
||||
@@ -50,7 +49,7 @@ where
|
||||
|
||||
/// Find entities by their relationship to the id
|
||||
pub async fn find_entities_by_relationship_by_id(
|
||||
db_client: &Surreal<Any>,
|
||||
db: &SurrealDbClient,
|
||||
entity_id: String,
|
||||
) -> Result<Vec<KnowledgeEntity>, Error> {
|
||||
let query = format!(
|
||||
@@ -60,15 +59,5 @@ pub async fn find_entities_by_relationship_by_id(
|
||||
|
||||
debug!("{}", query);
|
||||
|
||||
db_client.query(query).await?.take(0)
|
||||
}
|
||||
|
||||
/// Get a specific KnowledgeEntity by its id
|
||||
pub async fn get_entity_by_id(
|
||||
db_client: &Surreal<Any>,
|
||||
entity_id: &str,
|
||||
) -> Result<Option<KnowledgeEntity>, Error> {
|
||||
db_client
|
||||
.select((KnowledgeEntity::table_name(), entity_id))
|
||||
.await
|
||||
db.query(query).await?.take(0)
|
||||
}
|
||||
|
||||
@@ -9,11 +9,13 @@ use crate::{
|
||||
graph::{find_entities_by_relationship_by_id, find_entities_by_source_ids},
|
||||
vector::find_items_by_vector_similarity,
|
||||
},
|
||||
storage::types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk},
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk},
|
||||
},
|
||||
};
|
||||
use futures::future::{try_join, try_join_all};
|
||||
use std::collections::HashMap;
|
||||
use surrealdb::{engine::any::Any, Surreal};
|
||||
|
||||
/// Performs a comprehensive knowledge entity retrieval using multiple search strategies
|
||||
/// to find the most relevant entities for a given query.
|
||||
@@ -37,7 +39,7 @@ use surrealdb::{engine::any::Any, Surreal};
|
||||
/// * `Result<Vec<KnowledgeEntity>, AppError>` - A deduplicated vector of relevant
|
||||
/// knowledge entities, or an error if the retrieval process fails
|
||||
pub async fn combined_knowledge_entity_retrieval(
|
||||
db_client: &Surreal<Any>,
|
||||
db_client: &SurrealDbClient,
|
||||
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
query: &str,
|
||||
user_id: &str,
|
||||
|
||||
Reference in New Issue
Block a user