diff --git a/src/ingress/types/ingress_input.rs b/src/ingress/types/ingress_input.rs index 30fd248..3a32d2e 100644 --- a/src/ingress/types/ingress_input.rs +++ b/src/ingress/types/ingress_input.rs @@ -1,10 +1,12 @@ use super::ingress_object::IngressObject; -use crate::storage::{db::SurrealDbClient, types::file_info::FileInfo}; +use crate::storage::{ + db::{get_item, SurrealDbClient}, + types::file_info::FileInfo, +}; use serde::{Deserialize, Serialize}; use thiserror::Error; use tracing::info; use url::Url; -use uuid::Uuid; /// Struct defining the expected body when ingressing content. #[derive(Serialize, Deserialize, Debug)] @@ -24,6 +26,9 @@ pub enum IngressContentError { #[error("UTF-8 conversion error: {0}")] Utf8(#[from] std::string::FromUtf8Error), + #[error("SurrealDb error: {0}")] + SurrealDbError(#[from] surrealdb::Error), + #[error("MIME type detection failed for input: {0}")] MimeDetection(String), @@ -79,19 +84,15 @@ pub async fn create_ingress_objects( // Look up FileInfo objects using the db and the submitted uuids in input.files if let Some(file_uuids) = input.files { - for uuid_str in file_uuids { - let uuid = Uuid::parse_str(&uuid_str)?; - match FileInfo::get_by_uuid(uuid, db_client).await { - Ok(file_info) => { - object_list.push(IngressObject::File { - file_info, - instructions: input.instructions.clone(), - category: input.category.clone(), - }); - } - _ => { - info!("No file with UUID: {}", uuid); - } + for uuid in file_uuids { + if let Some(file_info) = get_item::(&db_client, &uuid).await? { + object_list.push(IngressObject::File { + file_info, + instructions: input.instructions.clone(), + category: input.category.clone(), + }); + } else { + info!("No file with UUID: {}", uuid); } } } diff --git a/src/storage/db.rs b/src/storage/db.rs index ecd782e..911059a 100644 --- a/src/storage/db.rs +++ b/src/storage/db.rs @@ -98,3 +98,18 @@ where { db_client.select(T::table_name()).await } + +/// Operation to retrieve a single object by its ID, requires the struct to implement StoredObject +/// +/// # Arguments +/// * `db_client` - An initialized database client +/// * `id` - The ID of the item to retrieve +/// +/// # Returns +/// * `Result, Error>` - The found item or Error +pub async fn get_item(db_client: &Surreal, id: &str) -> Result, Error> +where + T: for<'de> StoredObject, +{ + Ok(db_client.select((T::table_name(), id)).await?) +} diff --git a/src/storage/types/file_info.rs b/src/storage/types/file_info.rs index 014a3ba..7596f9d 100644 --- a/src/storage/types/file_info.rs +++ b/src/storage/types/file_info.rs @@ -1,282 +1,142 @@ -use axum::{ - http::StatusCode, - response::{IntoResponse, Response}, - Json, -}; use axum_typed_multipart::FieldData; use mime_guess::from_path; -use serde::{Deserialize, Serialize}; -use serde_json::json; use sha2::{Digest, Sha256}; use std::{ io::{BufReader, Read}, path::{Path, PathBuf}, }; -use surrealdb::RecordId; use tempfile::NamedTempFile; use thiserror::Error; -use tracing::{debug, info}; +use tracing::info; use uuid::Uuid; -use crate::storage::db::SurrealDbClient; +use crate::{ + storage::db::{store_item, SurrealDbClient}, + stored_object, +}; -#[derive(Debug, Deserialize)] -struct Record { - #[allow(dead_code)] - id: RecordId, -} - -/// Represents metadata and storage information for a file. -#[derive(Debug, PartialEq, Clone, Deserialize, Serialize)] -pub struct FileInfo { - pub uuid: String, - pub sha256: String, - pub path: String, - pub mime_type: String, -} - -/// Errors that can occur during FileInfo operations #[derive(Error, Debug)] pub enum FileError { - #[error("IO error occurred: {0}")] - Io(#[from] std::io::Error), - - #[error("UTF-8 conversion error: {0}")] - Utf8(#[from] std::string::FromUtf8Error), - - #[error("MIME type detection failed for input: {0}")] - MimeDetection(String), - - #[error("Unsupported MIME type: {0}")] - UnsupportedMime(String), - - #[error("SurrealDB error: {0}")] - SurrealError(#[from] surrealdb::Error), - #[error("File not found for UUID: {0}")] FileNotFound(String), + #[error("IO error occurred: {0}")] + Io(#[from] std::io::Error), + #[error("Duplicate file detected with SHA256: {0}")] DuplicateFile(String), - #[error("Hash collision detected")] - HashCollision, + #[error("SurrealDB error: {0}")] + SurrealError(#[from] surrealdb::Error), - #[error("Invalid UUID format: {0}")] - InvalidUuid(String), + #[error("Failed to persist file: {0}")] + PersistError(#[from] tempfile::PersistError), #[error("File name missing in metadata")] MissingFileName, - - #[error("Failed to persist file: {0}")] - PersistError(String), - - #[error("Serialization error: {0}")] - SerializationError(String), - - #[error("Deserialization error: {0}")] - DeserializationError(String), - // Add more error variants as needed. } -impl IntoResponse for FileError { - fn into_response(self) -> Response { - let (status, error_message) = match self { - FileError::Io(_) => (StatusCode::INTERNAL_SERVER_ERROR, "Internal server error"), - FileError::Utf8(_) => (StatusCode::BAD_REQUEST, "Invalid UTF-8 data"), - FileError::MimeDetection(_) => (StatusCode::BAD_REQUEST, "MIME type detection failed"), - FileError::UnsupportedMime(_) => { - (StatusCode::UNSUPPORTED_MEDIA_TYPE, "Unsupported MIME type") - } - FileError::FileNotFound(_) => (StatusCode::NOT_FOUND, "File not found"), - FileError::DuplicateFile(_) => (StatusCode::CONFLICT, "Duplicate file detected"), - FileError::HashCollision => { - (StatusCode::INTERNAL_SERVER_ERROR, "Hash collision detected") - } - FileError::InvalidUuid(_) => (StatusCode::BAD_REQUEST, "Invalid UUID format"), - FileError::MissingFileName => { - (StatusCode::BAD_REQUEST, "Missing file name in metadata") - } - FileError::PersistError(_) => { - (StatusCode::INTERNAL_SERVER_ERROR, "Failed to persist file") - } - FileError::SerializationError(_) => { - (StatusCode::INTERNAL_SERVER_ERROR, "Serialization error") - } - FileError::DeserializationError(_) => { - (StatusCode::BAD_REQUEST, "Deserialization error") - } - FileError::SurrealError(_) => { - (StatusCode::INTERNAL_SERVER_ERROR, "Serialization error") - } - }; - - let body = Json(json!({ - "error": error_message, - })); - - (status, body).into_response() - } -} +stored_object!(FileInfo, "file", { + sha256: String, + path: String, + mime_type: String +}); impl FileInfo { pub async fn new( field_data: FieldData, db_client: &SurrealDbClient, - ) -> Result { - let file = field_data.contents; // NamedTempFile - let metadata = field_data.metadata; + ) -> Result { + let file = field_data.contents; + let file_name = field_data + .metadata + .file_name + .ok_or(FileError::MissingFileName)?; - // Extract file name from metadata - let file_name = metadata.file_name.ok_or(FileError::MissingFileName)?; - info!("File name: {:?}", file_name); + // Calculate SHA256 + let sha256 = Self::get_sha(&file).await?; - // Calculate SHA256 hash of the file - let sha = Self::get_sha(&file).await?; - info!("SHA256: {:?}", sha); - - // Check if SHA exists in SurrealDB - if let Ok(file) = Self::get_by_sha(&sha, db_client).await { - info!("File already exists in database with SHA256: {}", sha); - // SHA exists: return FileInfo - return Ok(file); + // Early return if file already exists + match Self::get_by_sha(&sha256, db_client).await { + Ok(existing_file) => { + info!("File already exists with SHA256: {}", sha256); + return Ok(existing_file); + } + Err(FileError::FileNotFound(_)) => (), // Expected case for new files + Err(e) => return Err(e), // Propagate unexpected errors } - // Generate a new UUID + // Generate UUID and prepare paths let uuid = Uuid::new_v4(); - info!("UUID: {:?}", uuid); + let sanitized_file_name = Self::sanitize_file_name(&file_name); - // Sanitize file name - let sanitized_file_name = sanitize_file_name(&file_name); - info!("Sanitized file name: {:?}", sanitized_file_name); - - // Persist the file to the filesystem - let persisted_path = Self::persist_file(&uuid, file, &sanitized_file_name).await?; - - // Guess the MIME type - let mime_type = Self::guess_mime_type(&persisted_path); - info!("Mime type: {:?}", mime_type); - - // Construct the FileInfo object - let file_info = FileInfo { - uuid: uuid.to_string(), - sha256: sha.clone(), - path: persisted_path.to_string_lossy().to_string(), - mime_type, + // Create new FileInfo instance + let file_info = Self { + id: uuid.to_string(), + sha256, + path: Self::persist_file(&uuid, file, &sanitized_file_name) + .await? + .to_string_lossy() + .into(), + mime_type: Self::guess_mime_type(Path::new(&sanitized_file_name)), }; - // Store FileInfo in SurrealDB - Self::create_record(&file_info, db_client).await?; + // Store in database + store_item(&db_client.client, file_info.clone()).await?; Ok(file_info) } - /// Updates an existing file identified by UUID with new file data. + /// Guesses the MIME type based on the file extension. /// /// # Arguments - /// * `uuid` - The UUID of the file to update. - /// * `new_field_data` - The new file data. - /// * `redis_client` - Reference to the RedisClient. + /// * `path` - The path to the file. /// /// # Returns - /// * `Result` - The updated `FileInfo` or an error. - pub async fn update( - uuid: Uuid, - new_field_data: FieldData, - db_client: &SurrealDbClient, - ) -> Result { - let new_file = new_field_data.contents; - let new_metadata = new_field_data.metadata; - - // Extract new file name - let new_file_name = new_metadata.file_name.ok_or(FileError::MissingFileName)?; - - // Calculate SHA256 of the new file - let new_sha = Self::get_sha(&new_file).await?; - - // Check if the new SHA already exists - if let Ok(file) = Self::get_by_sha(&new_sha, db_client).await { - info!("File already exists in database with SHA256: {}", new_sha); - // SHA exists: return FileInfo - return Ok(file); - } - - // Sanitize new file name - let sanitized_new_file_name = sanitize_file_name(&new_file_name); - - // Persist the new file - let new_persisted_path = - Self::persist_file(&uuid, new_file, &sanitized_new_file_name).await?; - - // Guess the new MIME type - let new_mime_type = Self::guess_mime_type(&new_persisted_path); - - // Get the existing item and remove it - let old_record = Self::get_by_uuid(uuid, db_client).await?; - Self::delete_record(&old_record.sha256, db_client).await?; - - // Update FileInfo - let updated_file_info = FileInfo { - uuid: uuid.to_string(), - sha256: new_sha.clone(), - path: new_persisted_path.to_string_lossy().to_string(), - mime_type: new_mime_type, - }; - - // Save the new item - Self::create_record(&updated_file_info, db_client).await?; - - // Optionally, delete the old file from the filesystem if it's no longer referenced - // This requires reference counting or checking if other FileInfo entries point to the same SHA - // For simplicity, this step is omitted. - - Ok(updated_file_info) + /// * `String` - The guessed MIME type as a string. + fn guess_mime_type(path: &Path) -> String { + from_path(path) + .first_or(mime::APPLICATION_OCTET_STREAM) + .to_string() } - /// Deletes a file and its corresponding metadata based on UUID. + /// Calculates the SHA256 hash of the given file. /// /// # Arguments - /// * `uuid` - The UUID of the file to delete. - /// * `redis_client` - Reference to the RedisClient. + /// * `file` - The file to hash. /// /// # Returns - /// * `Result<(), FileError>` - Empty result or an error. - pub async fn delete(uuid: Uuid, db_client: &SurrealDbClient) -> Result<(), FileError> { - // Retrieve FileInfo to get SHA256 and path - let file_info = Self::get_by_uuid(uuid, db_client).await?; + /// * `Result` - The SHA256 hash as a hex string or an error. + async fn get_sha(file: &NamedTempFile) -> Result { + let mut reader = BufReader::new(file.as_file()); + let mut hasher = Sha256::new(); + let mut buffer = [0u8; 8192]; // 8KB buffer - // Delete the file from the filesystem - let file_path = Path::new(&file_info.path); - if file_path.exists() { - tokio::fs::remove_file(file_path) - .await - .map_err(FileError::Io)?; - info!("Deleted file at path: {}", file_info.path); - } else { - info!( - "File path does not exist, skipping deletion: {}", - file_info.path - ); - } - - // Delete the FileInfo from database - Self::delete_record(&file_info.sha256, db_client).await?; - - // Remove the UUID directory if empty - let uuid_dir = file_path - .parent() - .ok_or(FileError::FileNotFound(uuid.to_string()))?; - if uuid_dir.exists() { - let mut entries = tokio::fs::read_dir(uuid_dir).await.map_err(FileError::Io)?; - if entries.next_entry().await?.is_none() { - tokio::fs::remove_dir(uuid_dir) - .await - .map_err(FileError::Io)?; - info!("Deleted empty UUID directory: {:?}", uuid_dir); + loop { + let n = reader.read(&mut buffer)?; + if n == 0 { + break; } + hasher.update(&buffer[..n]); } - Ok(()) + let digest = hasher.finalize(); + Ok(format!("{:x}", digest)) + } + + /// Sanitizes the file name to prevent security vulnerabilities like directory traversal. + /// Replaces any non-alphanumeric characters (excluding '.' and '_') with underscores. + fn sanitize_file_name(file_name: &str) -> String { + file_name + .chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == '.' || c == '_' { + c + } else { + '_' + } + }) + .collect() } /// Persists the file to the filesystem under `./data/{uuid}/{file_name}`. @@ -306,99 +166,13 @@ impl FileInfo { info!("Final path: {:?}", final_path); // Persist the temporary file to the final path - file.persist(&final_path) - .map_err(|e| FileError::PersistError(e.to_string()))?; + file.persist(&final_path)?; info!("Persisted file to {:?}", final_path); Ok(final_path) } - /// Calculates the SHA256 hash of the given file. - /// - /// # Arguments - /// * `file` - The file to hash. - /// - /// # Returns - /// * `Result` - The SHA256 hash as a hex string or an error. - async fn get_sha(file: &NamedTempFile) -> Result { - let mut reader = BufReader::new(file.as_file()); - let mut hasher = Sha256::new(); - let mut buffer = [0u8; 8192]; // 8KB buffer - - loop { - let n = reader.read(&mut buffer)?; - if n == 0 { - break; - } - hasher.update(&buffer[..n]); - } - - let digest = hasher.finalize(); - Ok(format!("{:x}", digest)) - } - - /// Guesses the MIME type based on the file extension. - /// - /// # Arguments - /// * `path` - The path to the file. - /// - /// # Returns - /// * `String` - The guessed MIME type as a string. - fn guess_mime_type(path: &Path) -> String { - from_path(path) - .first_or(mime::APPLICATION_OCTET_STREAM) - .to_string() - } - - /// Creates a new record in SurrealDB for the given `FileInfo`. - /// - /// # Arguments - /// * `file_info` - The `FileInfo` to store. - /// * `db_client` - Reference to the SurrealDbClient. - /// - /// # Returns - /// * `Result<(), FileError>` - Empty result or an error. - - async fn create_record( - file_info: &FileInfo, - db_client: &SurrealDbClient, - ) -> Result<(), FileError> { - // Create the record - let _created: Option = db_client - .client - .create(("file", &file_info.uuid)) - .content(file_info.clone()) - .await?; - - debug!("{:?}", _created); - - info!("Created FileInfo record with SHA256: {}", file_info.sha256); - - Ok(()) - } - - /// Retrieves a `FileInfo` by UUID. - /// - /// # Arguments - /// * `uuid` - The UUID string. - /// * `db_client` - Reference to the SurrealDbClient. - /// - /// # Returns - /// * `Result` - The `FileInfo` or `Error` if not found. - pub async fn get_by_uuid( - uuid: Uuid, - db_client: &SurrealDbClient, - ) -> Result { - let query = format!("SELECT * FROM file WHERE uuid = '{}'", uuid); - let response: Vec = db_client.client.query(query).await?.take(0)?; - - response - .into_iter() - .next() - .ok_or(FileError::FileNotFound(uuid.to_string())) - } - /// Retrieves a `FileInfo` by SHA256. /// /// # Arguments @@ -411,45 +185,9 @@ impl FileInfo { let query = format!("SELECT * FROM file WHERE sha256 = '{}'", &sha256); let response: Vec = db_client.client.query(query).await?.take(0)?; - debug!("{:?}", response); - response .into_iter() .next() .ok_or(FileError::FileNotFound(sha256.to_string())) } - - /// Deletes a `FileInfo` record by SHA256. - /// - /// # Arguments - /// * `sha256` - The SHA256 hash string. - /// * `db_client` - Reference to the SurrealDbClient. - /// - /// # Returns - /// * `Result<(), FileError>` - Empty result or an error. - async fn delete_record(sha256: &str, db_client: &SurrealDbClient) -> Result<(), FileError> { - let table = "file"; - let primary_key = sha256; - - let _created: Option = db_client.client.delete((table, primary_key)).await?; - - info!("Deleted FileInfo record with SHA256: {}", sha256); - - Ok(()) - } -} - -/// Sanitizes the file name to prevent security vulnerabilities like directory traversal. -/// Replaces any non-alphanumeric characters (excluding '.' and '_') with underscores. -fn sanitize_file_name(file_name: &str) -> String { - file_name - .chars() - .map(|c| { - if c.is_ascii_alphanumeric() || c == '.' || c == '_' { - c - } else { - '_' - } - }) - .collect() }