mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-23 08:07:41 +02:00
comments, llm schema
This commit is contained in:
+1
-14
@@ -24,6 +24,7 @@ pub struct FileInfo {
|
||||
pub mime_type: String,
|
||||
}
|
||||
|
||||
/// Errors that can occur during FileInfo operations
|
||||
#[derive(Error, Debug)]
|
||||
pub enum FileError {
|
||||
#[error("IO error occurred: {0}")]
|
||||
@@ -98,11 +99,9 @@ impl FileInfo {
|
||||
/// Creates a new `FileInfo` instance from uploaded field data.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `field_data` - The uploaded file data.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<FileInfo, FileError>` - The created `FileInfo` or an error.
|
||||
pub async fn new(field_data: FieldData<NamedTempFile>, redis_client: &RedisClient) -> Result<FileInfo, FileError> {
|
||||
let file = field_data.contents; // NamedTempFile
|
||||
@@ -157,11 +156,9 @@ impl FileInfo {
|
||||
/// Retrieves `FileInfo` based on UUID.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `uuid` - The UUID of the file.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<FileInfo, FileError>` - The `FileInfo` or an error.
|
||||
pub async fn get(uuid: Uuid, redis_client: &RedisClient) -> Result<FileInfo, FileError> {
|
||||
// Fetch SHA256 from UUID mapping
|
||||
@@ -178,13 +175,11 @@ impl FileInfo {
|
||||
/// Updates an existing file identified by UUID with new file data.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `uuid` - The UUID of the file to update.
|
||||
/// * `new_field_data` - The new file data.
|
||||
/// * `redis_client` - Reference to the RedisClient.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<FileInfo, FileError>` - The updated `FileInfo` or an error.
|
||||
pub async fn update(uuid: Uuid, new_field_data: FieldData<NamedTempFile>, redis_client: &RedisClient) -> Result<FileInfo, FileError> {
|
||||
let new_file = new_field_data.contents;
|
||||
@@ -237,12 +232,10 @@ impl FileInfo {
|
||||
/// Deletes a file and its corresponding metadata based on UUID.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `uuid` - The UUID of the file to delete.
|
||||
/// * `redis_client` - Reference to the RedisClient.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<(), FileError>` - Empty result or an error.
|
||||
pub async fn delete(uuid: Uuid, redis_client: &RedisClient) -> Result<(), FileError> {
|
||||
// Retrieve FileInfo to get SHA256 and path
|
||||
@@ -279,13 +272,11 @@ impl FileInfo {
|
||||
/// Persists the file to the filesystem under `./data/{uuid}/{file_name}`.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `uuid` - The UUID of the file.
|
||||
/// * `file` - The temporary file to persist.
|
||||
/// * `file_name` - The sanitized file name.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<PathBuf, FileError>` - The persisted file path or an error.
|
||||
async fn persist_file(uuid: &Uuid, file: NamedTempFile, file_name: &str) -> Result<PathBuf, FileError> {
|
||||
let base_dir = Path::new("./data");
|
||||
@@ -309,11 +300,9 @@ impl FileInfo {
|
||||
/// Calculates the SHA256 hash of the given file.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `file` - The file to hash.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<String, FileError>` - The SHA256 hash as a hex string or an error.
|
||||
async fn get_sha(file: &NamedTempFile) -> Result<String, FileError> {
|
||||
let mut reader = BufReader::new(file.as_file());
|
||||
@@ -335,11 +324,9 @@ impl FileInfo {
|
||||
/// Guesses the MIME type based on the file extension.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `path` - The path to the file.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `String` - The guessed MIME type as a string.
|
||||
fn guess_mime_type(path: &Path) -> String {
|
||||
from_path(path)
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::redis::client::RedisClient;
|
||||
|
||||
use super::{file_info::FileInfo, ingress_object::IngressObject };
|
||||
|
||||
/// Struct defining the expected body when ingressing content.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IngressInput {
|
||||
pub content: Option<String>,
|
||||
@@ -41,6 +42,13 @@ pub enum IngressContentError {
|
||||
}
|
||||
|
||||
/// Function to create ingress objects from input.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `input` - IngressInput containing information needed to ingress content.
|
||||
/// * `redis_client` - Initialized redis client needed to retrieve file information
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Vec<IngressObject>` - An array containing the ingressed objects, one file/contenttype per object.
|
||||
pub async fn create_ingress_objects(
|
||||
input: IngressInput,
|
||||
redis_client: &RedisClient,
|
||||
@@ -48,6 +56,7 @@ pub async fn create_ingress_objects(
|
||||
// Initialize list
|
||||
let mut object_list = Vec::new();
|
||||
|
||||
// Create a IngressObject from input.content if it exists, checking for URL or text
|
||||
if let Some(input_content) = input.content {
|
||||
match Url::parse(&input_content) {
|
||||
Ok(url) => {
|
||||
@@ -69,6 +78,7 @@ pub async fn create_ingress_objects(
|
||||
}
|
||||
}
|
||||
|
||||
// Look up FileInfo objects using the redis db and the submitted uuids in input.files
|
||||
if let Some(file_uuids) = input.files {
|
||||
for uuid_str in file_uuids {
|
||||
let uuid = Uuid::parse_str(&uuid_str)?;
|
||||
@@ -88,6 +98,7 @@ pub async fn create_ingress_objects(
|
||||
}
|
||||
}
|
||||
|
||||
// If no objects are constructed, we return Err
|
||||
if object_list.is_empty() {
|
||||
return Err(IngressContentError::MimeDetection(
|
||||
"No valid content or files provided".into(),
|
||||
|
||||
@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{ingress_content::IngressContentError, text_content::TextContent};
|
||||
|
||||
/// Knowledge object type, containing the content or reference to it, as well as metadata
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum IngressObject {
|
||||
Url {
|
||||
@@ -21,7 +22,15 @@ pub enum IngressObject {
|
||||
category: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl IngressObject {
|
||||
/// Creates a new `TextContent` instance from a `IngressObject`.
|
||||
///
|
||||
/// # Arguments
|
||||
/// `&self` - A reference to the `IngressObject`.
|
||||
///
|
||||
/// # Returns
|
||||
/// `TextContent` - An object containing a text representation of the object, could be a scraped URL, parsed PDF, etc.
|
||||
pub async fn to_text_content(&self) -> Result<TextContent, IngressContentError> {
|
||||
match self {
|
||||
IngressObject::Url { url, instructions, category } => {
|
||||
@@ -66,6 +75,11 @@ impl IngressObject {
|
||||
let content = tokio::fs::read_to_string(&file_info.path).await?;
|
||||
Ok(content)
|
||||
}
|
||||
"text/markdown" => {
|
||||
// Read the file and return its content
|
||||
let content = tokio::fs::read_to_string(&file_info.path).await?;
|
||||
Ok(content)
|
||||
}
|
||||
"application/pdf" => {
|
||||
// TODO: Implement PDF text extraction using a crate like `pdf-extract` or `lopdf`
|
||||
Err(IngressContentError::UnsupportedMime(file_info.mime_type.clone()))
|
||||
|
||||
+54
-19
@@ -70,26 +70,61 @@ impl TextContent {
|
||||
let client = async_openai::Client::new();
|
||||
|
||||
// Define the JSON Schema for the expected response
|
||||
let schema = json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"json_ld": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": { "type": "string" },
|
||||
"@type": { "type": "string" },
|
||||
"name": { "type": "string" }
|
||||
// Define only the essential properties
|
||||
},
|
||||
"required": ["@context", "@type", "name"],
|
||||
"additionalProperties": false
|
||||
// let schema = json!({
|
||||
// "type": "object",
|
||||
// "properties": {
|
||||
// "json_ld": {
|
||||
// "type": "object",
|
||||
// "properties": {
|
||||
// "@context": { "type": "string" },
|
||||
// "@type": { "type": "string" },
|
||||
// "name": { "type": "string" }
|
||||
// // Define only the essential properties
|
||||
// },
|
||||
// "required": ["@context", "@type", "name"],
|
||||
// "additionalProperties": false,
|
||||
// },
|
||||
// "description": { "type": "string" },
|
||||
// "related_category": { "type": "string" },
|
||||
// "instructions": { "type": "string" }
|
||||
// },
|
||||
// "required": ["json_ld", "description", "related_category", "instructions"],
|
||||
// "additionalProperties": false
|
||||
// });
|
||||
let schema = json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"knowledge_sources": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"type": {"type": "string", "enum": ["Document", "Page", "TextSnippet"]},
|
||||
"title": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"relationships": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"type": "string", "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]},
|
||||
"target": {"type": "string", "description": "ID of the related knowledge source"}
|
||||
},
|
||||
"required": ["type", "target"],
|
||||
"additionalProperties": false,
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": { "type": "string" },
|
||||
"related_category": { "type": "string" },
|
||||
"instructions": { "type": "string" }
|
||||
"required": ["id", "type", "title", "description", "relationships"],
|
||||
"additionalProperties": false,
|
||||
}
|
||||
},
|
||||
"required": ["json_ld", "description", "related_category", "instructions"],
|
||||
"additionalProperties": false
|
||||
"category": {"type": "string"},
|
||||
"instructions": {"type": "string"}
|
||||
},
|
||||
"required": ["knowledge_sources", "category", "instructions"],
|
||||
"additionalProperties": false
|
||||
});
|
||||
|
||||
let response_format = async_openai::types::ResponseFormat::JsonSchema {
|
||||
@@ -114,7 +149,7 @@ impl TextContent {
|
||||
// Build the chat completion request
|
||||
let request = CreateChatCompletionRequestArgs::default()
|
||||
.model("gpt-4o-mini")
|
||||
.max_tokens(1024u32)
|
||||
.max_tokens(2048u32)
|
||||
.messages([
|
||||
ChatCompletionRequestSystemMessage::from(system_message).into(),
|
||||
ChatCompletionRequestUserMessage::from(user_message).into(),
|
||||
|
||||
Reference in New Issue
Block a user