mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-18 07:24:12 +01:00
feat: readability parsing, screenshot of page, file serving
This commit is contained in:
@@ -33,4 +33,6 @@ pub enum AppError {
|
||||
Tiktoken(#[from] anyhow::Error),
|
||||
#[error("Ingress Processing error: {0}")]
|
||||
Processing(String),
|
||||
#[error("DOM smoothie error: {0}")]
|
||||
DomSmoothie(#[from] dom_smoothie::ReadabilityError),
|
||||
}
|
||||
|
||||
@@ -38,7 +38,8 @@ stored_object!(FileInfo, "file", {
|
||||
sha256: String,
|
||||
path: String,
|
||||
file_name: String,
|
||||
mime_type: String
|
||||
mime_type: String,
|
||||
user_id: String
|
||||
});
|
||||
|
||||
impl FileInfo {
|
||||
@@ -83,6 +84,7 @@ impl FileInfo {
|
||||
.to_string_lossy()
|
||||
.into(),
|
||||
mime_type: Self::guess_mime_type(Path::new(&sanitized_file_name)),
|
||||
user_id: user_id.to_string(),
|
||||
};
|
||||
|
||||
// Store in database
|
||||
@@ -258,6 +260,22 @@ impl FileInfo {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieves a `FileInfo` by its ID.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `id` - The ID string of the file.
|
||||
/// * `db_client` - Reference to the SurrealDbClient.
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Result<FileInfo, FileError>` - The `FileInfo` or an error if not found or on DB issues.
|
||||
pub async fn get_by_id(id: &str, db_client: &SurrealDbClient) -> Result<FileInfo, FileError> {
|
||||
match db_client.get_item::<FileInfo>(id).await {
|
||||
Ok(Some(file_info)) => Ok(file_info),
|
||||
Ok(None) => Err(FileError::FileNotFound(id.to_string())),
|
||||
Err(e) => Err(FileError::SurrealError(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -460,6 +478,7 @@ mod tests {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
user_id: "user123".to_string(),
|
||||
sha256: "test_sha256_hash".to_string(),
|
||||
path: "/path/to/file.txt".to_string(),
|
||||
file_name: "manual_file.txt".to_string(),
|
||||
@@ -517,6 +536,7 @@ mod tests {
|
||||
// The file path should point to our test file
|
||||
let file_info = FileInfo {
|
||||
id: file_id.clone(),
|
||||
user_id: "user123".to_string(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
sha256: "test_sha256_hash".to_string(),
|
||||
@@ -586,4 +606,72 @@ mod tests {
|
||||
_ => panic!("Expected FileNotFound error"),
|
||||
}
|
||||
}
|
||||
#[tokio::test]
|
||||
async fn test_get_by_id() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
|
||||
// Create a FileInfo instance directly
|
||||
let now = Utc::now();
|
||||
let file_id = Uuid::new_v4().to_string();
|
||||
let original_file_info = FileInfo {
|
||||
id: file_id.clone(),
|
||||
user_id: "user123".to_string(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
sha256: "test_sha256_for_get_by_id".to_string(),
|
||||
path: "/path/to/get_by_id_test.txt".to_string(),
|
||||
file_name: "get_by_id_test.txt".to_string(),
|
||||
mime_type: "text/plain".to_string(),
|
||||
};
|
||||
|
||||
// Store it in the database
|
||||
db.store_item(original_file_info.clone())
|
||||
.await
|
||||
.expect("Failed to store item for get_by_id test");
|
||||
|
||||
// Retrieve it using get_by_id
|
||||
let result = FileInfo::get_by_id(&file_id, &db).await;
|
||||
|
||||
// Assert success and content match
|
||||
assert!(result.is_ok());
|
||||
let retrieved_info = result.unwrap();
|
||||
assert_eq!(retrieved_info.id, original_file_info.id);
|
||||
assert_eq!(retrieved_info.sha256, original_file_info.sha256);
|
||||
assert_eq!(retrieved_info.file_name, original_file_info.file_name);
|
||||
assert_eq!(retrieved_info.path, original_file_info.path);
|
||||
assert_eq!(retrieved_info.mime_type, original_file_info.mime_type);
|
||||
// Optionally compare timestamps if precision isn't an issue
|
||||
// assert_eq!(retrieved_info.created_at, original_file_info.created_at);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_by_id_not_found() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
|
||||
// Try to retrieve a non-existent ID
|
||||
let non_existent_id = "non-existent-file-id";
|
||||
let result = FileInfo::get_by_id(non_existent_id, &db).await;
|
||||
|
||||
// Assert failure
|
||||
assert!(result.is_err());
|
||||
|
||||
// Assert the specific error type is FileNotFound
|
||||
match result {
|
||||
Err(FileError::FileNotFound(id)) => {
|
||||
assert_eq!(id, non_existent_id);
|
||||
}
|
||||
Err(e) => panic!("Expected FileNotFound error, but got {:?}", e),
|
||||
Ok(_) => panic!("Expected an error, but got Ok"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,6 +114,7 @@ mod tests {
|
||||
id: mock.id,
|
||||
sha256: "mock-sha256".to_string(),
|
||||
path: "/mock/path".to_string(),
|
||||
user_id: "user123".to_string(),
|
||||
file_name: "mock.txt".to_string(),
|
||||
mime_type: "text/plain".to_string(),
|
||||
created_at: Utc::now(),
|
||||
|
||||
@@ -31,19 +31,7 @@ impl SystemSettings {
|
||||
let settings: Option<Self> = db.get_item("current").await?;
|
||||
|
||||
if settings.is_none() {
|
||||
let created_settings = SystemSettings {
|
||||
id: "current".to_string(),
|
||||
registrations_enabled: true,
|
||||
require_email_verification: false,
|
||||
query_model: "gpt-4o-mini".to_string(),
|
||||
processing_model: "gpt-4o-mini".to_string(),
|
||||
query_system_prompt:
|
||||
crate::storage::types::system_prompts::DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
|
||||
ingestion_system_prompt:
|
||||
crate::storage::types::system_prompts::DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT
|
||||
.to_string(),
|
||||
};
|
||||
|
||||
let created_settings = Self::new();
|
||||
let stored: Option<Self> = db.store_item(created_settings).await?;
|
||||
return stored.ok_or(AppError::Validation("Failed to initialize settings".into()));
|
||||
}
|
||||
|
||||
@@ -5,10 +5,17 @@ use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
|
||||
|
||||
use super::file_info::FileInfo;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
|
||||
pub struct UrlInfo {
|
||||
pub url: String,
|
||||
pub title: String,
|
||||
pub image_id: String,
|
||||
}
|
||||
|
||||
stored_object!(TextContent, "text_content", {
|
||||
text: String,
|
||||
file_info: Option<FileInfo>,
|
||||
url: Option<String>,
|
||||
url_info: Option<UrlInfo>,
|
||||
instructions: String,
|
||||
category: String,
|
||||
user_id: String
|
||||
@@ -20,7 +27,7 @@ impl TextContent {
|
||||
instructions: String,
|
||||
category: String,
|
||||
file_info: Option<FileInfo>,
|
||||
url: Option<String>,
|
||||
url_info: Option<UrlInfo>,
|
||||
user_id: String,
|
||||
) -> Self {
|
||||
let now = Utc::now();
|
||||
@@ -30,7 +37,7 @@ impl TextContent {
|
||||
updated_at: now,
|
||||
text,
|
||||
file_info,
|
||||
url,
|
||||
url_info,
|
||||
instructions,
|
||||
category,
|
||||
user_id,
|
||||
@@ -85,7 +92,7 @@ mod tests {
|
||||
assert_eq!(text_content.category, category);
|
||||
assert_eq!(text_content.user_id, user_id);
|
||||
assert!(text_content.file_info.is_none());
|
||||
assert!(text_content.url.is_none());
|
||||
assert!(text_content.url_info.is_none());
|
||||
assert!(!text_content.id.is_empty());
|
||||
}
|
||||
|
||||
@@ -96,19 +103,27 @@ mod tests {
|
||||
let instructions = "URL instructions".to_string();
|
||||
let category = "URL category".to_string();
|
||||
let user_id = "user123".to_string();
|
||||
let url = Some("https://example.com/document.pdf".to_string());
|
||||
let title = "page_title".to_string();
|
||||
let image_id = "image12312".to_string();
|
||||
let url = "https://example.com/document.pdf".to_string();
|
||||
|
||||
let url_info = Some(UrlInfo {
|
||||
url,
|
||||
title,
|
||||
image_id,
|
||||
});
|
||||
|
||||
let text_content = TextContent::new(
|
||||
text.clone(),
|
||||
instructions.clone(),
|
||||
category.clone(),
|
||||
None,
|
||||
url.clone(),
|
||||
url_info.clone(),
|
||||
user_id.clone(),
|
||||
);
|
||||
|
||||
// Check URL field is set
|
||||
assert_eq!(text_content.url, url);
|
||||
assert_eq!(text_content.url_info, url_info);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
Reference in New Issue
Block a user