mirror of
https://github.com/perstarkse/minne.git
synced 2026-01-11 12:40:24 +01:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2c9bb848d | ||
|
|
04ee225732 | ||
|
|
13b7ad6f3a | ||
|
|
112a6965a4 | ||
|
|
911e830be5 | ||
|
|
3196e65172 | ||
|
|
f13791cfcf | ||
|
|
75c200b2ba | ||
|
|
1b7c24747a | ||
|
|
241ad9a089 |
@@ -1,8 +1,13 @@
|
||||
# Changelog
|
||||
## Unreleased
|
||||
|
||||
## Version 0.2.7 (2025-12-04)
|
||||
- Improved admin page, now only loads models when specifically requested. Groundwork for coming configuration features.
|
||||
- Fix: timezone aware info in scratchpad
|
||||
|
||||
## Version 0.2.6 (2025-10-29)
|
||||
- Added an opt-in FastEmbed-based reranking stage behind `reranking_enabled`. It improves retrieval accuracy by re-scoring hybrid results.
|
||||
- Fix: default name for relationships harmonized across application
|
||||
|
||||
## Version 0.2.5 (2025-10-24)
|
||||
- Added manual knowledge entity creation flows using a modal, with the option for suggested relationships
|
||||
|
||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -3248,11 +3248,13 @@ dependencies = [
|
||||
name = "ingestion-pipeline"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-openai",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"axum_typed_multipart",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"common",
|
||||
"composite-retrieval",
|
||||
|
||||
@@ -1,15 +1,22 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::{storage::db::SurrealDbClient, utils::config::AppConfig};
|
||||
use common::{
|
||||
storage::{db::SurrealDbClient, store::StorageManager},
|
||||
utils::config::AppConfig,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ApiState {
|
||||
pub db: Arc<SurrealDbClient>,
|
||||
pub config: AppConfig,
|
||||
pub storage: StorageManager,
|
||||
}
|
||||
|
||||
impl ApiState {
|
||||
pub async fn new(config: &AppConfig) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
pub async fn new(
|
||||
config: &AppConfig,
|
||||
storage: StorageManager,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let surreal_db_client = Arc::new(
|
||||
SurrealDbClient::new(
|
||||
&config.surrealdb_address,
|
||||
@@ -26,6 +33,7 @@ impl ApiState {
|
||||
let app_state = Self {
|
||||
db: surreal_db_client.clone(),
|
||||
config: config.clone(),
|
||||
storage,
|
||||
};
|
||||
|
||||
Ok(app_state)
|
||||
|
||||
@@ -32,7 +32,8 @@ pub async fn ingest_data(
|
||||
info!("Received input: {:?}", input);
|
||||
|
||||
let file_infos = try_join_all(input.files.into_iter().map(|file| {
|
||||
FileInfo::new(file, &state.db, &user.id, &state.config).map_err(AppError::from)
|
||||
FileInfo::new_with_storage(file, &state.db, &user.id, &state.storage)
|
||||
.map_err(AppError::from)
|
||||
}))
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::io::ErrorKind;
|
||||
use std::path::{Component, Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Result as AnyResult};
|
||||
@@ -6,36 +7,421 @@ use bytes::Bytes;
|
||||
use futures::stream::BoxStream;
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use object_store::local::LocalFileSystem;
|
||||
use object_store::memory::InMemory;
|
||||
use object_store::{path::Path as ObjPath, ObjectStore};
|
||||
|
||||
use crate::utils::config::{AppConfig, StorageKind};
|
||||
|
||||
pub type DynStore = Arc<dyn ObjectStore>;
|
||||
|
||||
/// Build an object store instance anchored at the given filesystem `prefix`.
|
||||
/// Storage manager with persistent state and proper lifecycle management.
|
||||
#[derive(Clone)]
|
||||
pub struct StorageManager {
|
||||
store: DynStore,
|
||||
backend_kind: StorageKind,
|
||||
local_base: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl StorageManager {
|
||||
/// Create a new StorageManager with the specified configuration.
|
||||
///
|
||||
/// This method validates the configuration and creates the appropriate
|
||||
/// storage backend with proper initialization.
|
||||
pub async fn new(cfg: &AppConfig) -> object_store::Result<Self> {
|
||||
let backend_kind = cfg.storage.clone();
|
||||
let (store, local_base) = create_storage_backend(cfg).await?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
backend_kind,
|
||||
local_base,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a StorageManager with a custom storage backend.
|
||||
///
|
||||
/// This method is useful for testing scenarios where you want to inject
|
||||
/// a specific storage backend.
|
||||
pub fn with_backend(store: DynStore, backend_kind: StorageKind) -> Self {
|
||||
Self {
|
||||
store,
|
||||
backend_kind,
|
||||
local_base: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the storage backend kind.
|
||||
pub fn backend_kind(&self) -> &StorageKind {
|
||||
&self.backend_kind
|
||||
}
|
||||
|
||||
/// Access the resolved local base directory when using the local backend.
|
||||
pub fn local_base_path(&self) -> Option<&Path> {
|
||||
self.local_base.as_deref()
|
||||
}
|
||||
|
||||
/// Resolve an object location to a filesystem path when using the local backend.
|
||||
///
|
||||
/// Returns `None` when the backend is not local or when the provided location includes
|
||||
/// unsupported components (absolute paths or parent traversals).
|
||||
pub fn resolve_local_path(&self, location: &str) -> Option<PathBuf> {
|
||||
let base = self.local_base_path()?;
|
||||
let relative = Path::new(location);
|
||||
if relative.is_absolute()
|
||||
|| relative
|
||||
.components()
|
||||
.any(|component| matches!(component, Component::ParentDir | Component::Prefix(_)))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(base.join(relative))
|
||||
}
|
||||
|
||||
/// Store bytes at the specified location.
|
||||
///
|
||||
/// This operation persists data using the underlying storage backend.
|
||||
/// For memory backends, data persists for the lifetime of the StorageManager.
|
||||
pub async fn put(&self, location: &str, data: Bytes) -> object_store::Result<()> {
|
||||
let path = ObjPath::from(location);
|
||||
let payload = object_store::PutPayload::from_bytes(data);
|
||||
self.store.put(&path, payload).await.map(|_| ())
|
||||
}
|
||||
|
||||
/// Retrieve bytes from the specified location.
|
||||
///
|
||||
/// Returns the full contents buffered in memory.
|
||||
pub async fn get(&self, location: &str) -> object_store::Result<Bytes> {
|
||||
let path = ObjPath::from(location);
|
||||
let result = self.store.get(&path).await?;
|
||||
result.bytes().await
|
||||
}
|
||||
|
||||
/// Get a streaming handle for large objects.
|
||||
///
|
||||
/// Returns a fallible stream of Bytes chunks suitable for large file processing.
|
||||
pub async fn get_stream(
|
||||
&self,
|
||||
location: &str,
|
||||
) -> object_store::Result<BoxStream<'static, object_store::Result<Bytes>>> {
|
||||
let path = ObjPath::from(location);
|
||||
let result = self.store.get(&path).await?;
|
||||
Ok(result.into_stream())
|
||||
}
|
||||
|
||||
/// Delete all objects below the specified prefix.
|
||||
///
|
||||
/// For local filesystem backends, this also attempts to clean up empty directories.
|
||||
pub async fn delete_prefix(&self, prefix: &str) -> object_store::Result<()> {
|
||||
let prefix_path = ObjPath::from(prefix);
|
||||
let locations = self
|
||||
.store
|
||||
.list(Some(&prefix_path))
|
||||
.map_ok(|m| m.location)
|
||||
.boxed();
|
||||
self.store
|
||||
.delete_stream(locations)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
|
||||
// Cleanup filesystem directories only for local backend
|
||||
if matches!(self.backend_kind, StorageKind::Local) {
|
||||
self.cleanup_filesystem_directories(prefix).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List all objects below the specified prefix.
|
||||
pub async fn list(
|
||||
&self,
|
||||
prefix: Option<&str>,
|
||||
) -> object_store::Result<Vec<object_store::ObjectMeta>> {
|
||||
let prefix_path = prefix.map(ObjPath::from);
|
||||
self.store.list(prefix_path.as_ref()).try_collect().await
|
||||
}
|
||||
|
||||
/// Check if an object exists at the specified location.
|
||||
pub async fn exists(&self, location: &str) -> object_store::Result<bool> {
|
||||
let path = ObjPath::from(location);
|
||||
self.store
|
||||
.head(&path)
|
||||
.await
|
||||
.map(|_| true)
|
||||
.or_else(|e| match e {
|
||||
object_store::Error::NotFound { .. } => Ok(false),
|
||||
_ => Err(e),
|
||||
})
|
||||
}
|
||||
|
||||
/// Cleanup filesystem directories for local backend.
|
||||
///
|
||||
/// This is a best-effort cleanup and ignores errors.
|
||||
async fn cleanup_filesystem_directories(&self, prefix: &str) -> object_store::Result<()> {
|
||||
if !matches!(self.backend_kind, StorageKind::Local) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let Some(base) = &self.local_base else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let relative = Path::new(prefix);
|
||||
if relative.is_absolute()
|
||||
|| relative
|
||||
.components()
|
||||
.any(|component| matches!(component, Component::ParentDir | Component::Prefix(_)))
|
||||
{
|
||||
tracing::warn!(
|
||||
prefix = %prefix,
|
||||
"Skipping directory cleanup for unsupported prefix components"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut current = base.join(relative);
|
||||
|
||||
while current.starts_with(base) && current.as_path() != base.as_path() {
|
||||
match tokio::fs::remove_dir(¤t).await {
|
||||
Ok(_) => {}
|
||||
Err(err) => match err.kind() {
|
||||
ErrorKind::NotFound => {}
|
||||
ErrorKind::DirectoryNotEmpty => break,
|
||||
_ => tracing::debug!(
|
||||
error = %err,
|
||||
path = %current.display(),
|
||||
"Failed to remove directory during cleanup"
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
if let Some(parent) = current.parent() {
|
||||
current = parent.to_path_buf();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a storage backend based on configuration.
|
||||
///
|
||||
/// - For the `Local` backend, `prefix` is the absolute directory on disk that
|
||||
/// serves as the root for all object paths passed to the store.
|
||||
/// - `prefix` must already exist; this function will create it if missing.
|
||||
///
|
||||
/// Example (Local):
|
||||
/// - prefix: `/var/data`
|
||||
/// - object location: `user/uuid/file.txt`
|
||||
/// - absolute path: `/var/data/user/uuid/file.txt`
|
||||
pub async fn build_store(prefix: &Path, cfg: &AppConfig) -> object_store::Result<DynStore> {
|
||||
/// This factory function handles the creation and initialization of different
|
||||
/// storage backends with proper error handling and validation.
|
||||
async fn create_storage_backend(
|
||||
cfg: &AppConfig,
|
||||
) -> object_store::Result<(DynStore, Option<PathBuf>)> {
|
||||
match cfg.storage {
|
||||
StorageKind::Local => {
|
||||
if !prefix.exists() {
|
||||
tokio::fs::create_dir_all(prefix).await.map_err(|e| {
|
||||
let base = resolve_base_dir(cfg);
|
||||
if !base.exists() {
|
||||
tokio::fs::create_dir_all(&base).await.map_err(|e| {
|
||||
object_store::Error::Generic {
|
||||
store: "LocalFileSystem",
|
||||
source: e.into(),
|
||||
}
|
||||
})?;
|
||||
}
|
||||
let store = LocalFileSystem::new_with_prefix(prefix)?;
|
||||
Ok(Arc::new(store))
|
||||
let store = LocalFileSystem::new_with_prefix(base.clone())?;
|
||||
Ok((Arc::new(store), Some(base)))
|
||||
}
|
||||
StorageKind::Memory => {
|
||||
let store = InMemory::new();
|
||||
Ok((Arc::new(store), None))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Testing utilities for storage operations.
|
||||
///
|
||||
/// This module provides specialized utilities for testing scenarios with
|
||||
/// automatic memory backend setup and proper test isolation.
|
||||
#[cfg(test)]
|
||||
pub mod testing {
|
||||
use super::*;
|
||||
use crate::utils::config::{AppConfig, PdfIngestMode};
|
||||
use uuid;
|
||||
|
||||
/// Create a test configuration with memory storage.
|
||||
///
|
||||
/// This provides a ready-to-use configuration for testing scenarios
|
||||
/// that don't require filesystem persistence.
|
||||
pub fn test_config_memory() -> AppConfig {
|
||||
AppConfig {
|
||||
openai_api_key: "test".into(),
|
||||
surrealdb_address: "test".into(),
|
||||
surrealdb_username: "test".into(),
|
||||
surrealdb_password: "test".into(),
|
||||
surrealdb_namespace: "test".into(),
|
||||
surrealdb_database: "test".into(),
|
||||
data_dir: "/tmp/unused".into(), // Ignored for memory storage
|
||||
http_port: 0,
|
||||
openai_base_url: "..".into(),
|
||||
storage: StorageKind::Memory,
|
||||
pdf_ingest_mode: PdfIngestMode::LlmFirst,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a test configuration with local storage.
|
||||
///
|
||||
/// This provides a ready-to-use configuration for testing scenarios
|
||||
/// that require actual filesystem operations.
|
||||
pub fn test_config_local() -> AppConfig {
|
||||
let base = format!("/tmp/minne_test_storage_{}", uuid::Uuid::new_v4());
|
||||
AppConfig {
|
||||
openai_api_key: "test".into(),
|
||||
surrealdb_address: "test".into(),
|
||||
surrealdb_username: "test".into(),
|
||||
surrealdb_password: "test".into(),
|
||||
surrealdb_namespace: "test".into(),
|
||||
surrealdb_database: "test".into(),
|
||||
data_dir: base.into(),
|
||||
http_port: 0,
|
||||
openai_base_url: "..".into(),
|
||||
storage: StorageKind::Local,
|
||||
pdf_ingest_mode: PdfIngestMode::LlmFirst,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// A specialized StorageManager for testing scenarios.
|
||||
///
|
||||
/// This provides automatic setup for memory storage with proper isolation
|
||||
/// and cleanup capabilities for test environments.
|
||||
#[derive(Clone)]
|
||||
pub struct TestStorageManager {
|
||||
storage: StorageManager,
|
||||
_temp_dir: Option<(String, std::path::PathBuf)>, // For local storage cleanup
|
||||
}
|
||||
|
||||
impl TestStorageManager {
|
||||
/// Create a new TestStorageManager with memory backend.
|
||||
///
|
||||
/// This is the preferred method for unit tests as it provides
|
||||
/// fast execution and complete isolation.
|
||||
pub async fn new_memory() -> object_store::Result<Self> {
|
||||
let cfg = test_config_memory();
|
||||
let storage = StorageManager::new(&cfg).await?;
|
||||
|
||||
Ok(Self {
|
||||
storage,
|
||||
_temp_dir: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new TestStorageManager with local filesystem backend.
|
||||
///
|
||||
/// This method creates a temporary directory that will be automatically
|
||||
/// cleaned up when the TestStorageManager is dropped.
|
||||
pub async fn new_local() -> object_store::Result<Self> {
|
||||
let cfg = test_config_local();
|
||||
let storage = StorageManager::new(&cfg).await?;
|
||||
let resolved = storage
|
||||
.local_base_path()
|
||||
.map(|path| (cfg.data_dir.clone(), path.to_path_buf()));
|
||||
|
||||
Ok(Self {
|
||||
storage,
|
||||
_temp_dir: resolved,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a TestStorageManager with custom configuration.
|
||||
pub async fn with_config(cfg: &AppConfig) -> object_store::Result<Self> {
|
||||
let storage = StorageManager::new(cfg).await?;
|
||||
let temp_dir = if matches!(cfg.storage, StorageKind::Local) {
|
||||
storage
|
||||
.local_base_path()
|
||||
.map(|path| (cfg.data_dir.clone(), path.to_path_buf()))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
storage,
|
||||
_temp_dir: temp_dir,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a reference to the underlying StorageManager.
|
||||
pub fn storage(&self) -> &StorageManager {
|
||||
&self.storage
|
||||
}
|
||||
|
||||
/// Clone the underlying StorageManager.
|
||||
pub fn clone_storage(&self) -> StorageManager {
|
||||
self.storage.clone()
|
||||
}
|
||||
|
||||
/// Store test data at the specified location.
|
||||
pub async fn put(&self, location: &str, data: &[u8]) -> object_store::Result<()> {
|
||||
self.storage.put(location, Bytes::from(data.to_vec())).await
|
||||
}
|
||||
|
||||
/// Retrieve test data from the specified location.
|
||||
pub async fn get(&self, location: &str) -> object_store::Result<Bytes> {
|
||||
self.storage.get(location).await
|
||||
}
|
||||
|
||||
/// Delete test data below the specified prefix.
|
||||
pub async fn delete_prefix(&self, prefix: &str) -> object_store::Result<()> {
|
||||
self.storage.delete_prefix(prefix).await
|
||||
}
|
||||
|
||||
/// Check if test data exists at the specified location.
|
||||
pub async fn exists(&self, location: &str) -> object_store::Result<bool> {
|
||||
self.storage.exists(location).await
|
||||
}
|
||||
|
||||
/// List all test objects below the specified prefix.
|
||||
pub async fn list(
|
||||
&self,
|
||||
prefix: Option<&str>,
|
||||
) -> object_store::Result<Vec<object_store::ObjectMeta>> {
|
||||
self.storage.list(prefix).await
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TestStorageManager {
|
||||
fn drop(&mut self) {
|
||||
// Clean up temporary directories for local storage
|
||||
if let Some((_, path)) = &self._temp_dir {
|
||||
if path.exists() {
|
||||
let _ = std::fs::remove_dir_all(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience macro for creating memory storage tests.
|
||||
///
|
||||
/// This macro simplifies the creation of test storage with memory backend.
|
||||
#[macro_export]
|
||||
macro_rules! test_storage_memory {
|
||||
() => {{
|
||||
async move {
|
||||
$crate::storage::store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("Failed to create test memory storage")
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// Convenience macro for creating local storage tests.
|
||||
///
|
||||
/// This macro simplifies the creation of test storage with local filesystem backend.
|
||||
#[macro_export]
|
||||
macro_rules! test_storage_local {
|
||||
() => {{
|
||||
async move {
|
||||
$crate::storage::store::testing::TestStorageManager::new_local()
|
||||
.await
|
||||
.expect("Failed to create test local storage")
|
||||
}
|
||||
}};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,124 +438,6 @@ pub fn resolve_base_dir(cfg: &AppConfig) -> PathBuf {
|
||||
}
|
||||
}
|
||||
|
||||
/// Build an object store rooted at the configured data directory.
|
||||
///
|
||||
/// This is the recommended way to obtain a store for logical object operations
|
||||
/// such as `put_bytes_at`, `get_bytes_at`, and `delete_prefix_at`.
|
||||
pub async fn build_store_root(cfg: &AppConfig) -> object_store::Result<DynStore> {
|
||||
let base = resolve_base_dir(cfg);
|
||||
build_store(&base, cfg).await
|
||||
}
|
||||
|
||||
/// Write bytes to `file_name` within a filesystem `prefix` using the configured store.
|
||||
///
|
||||
/// Prefer [`put_bytes_at`] for location-based writes that do not need to compute
|
||||
/// a separate filesystem prefix.
|
||||
pub async fn put_bytes(
|
||||
prefix: &Path,
|
||||
file_name: &str,
|
||||
data: Bytes,
|
||||
cfg: &AppConfig,
|
||||
) -> object_store::Result<()> {
|
||||
let store = build_store(prefix, cfg).await?;
|
||||
let payload = object_store::PutPayload::from_bytes(data);
|
||||
store.put(&ObjPath::from(file_name), payload).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write bytes to the provided logical object `location`, e.g. `"user/uuid/file"`.
|
||||
///
|
||||
/// The store root is taken from `AppConfig::data_dir` for the local backend.
|
||||
/// This performs an atomic write as guaranteed by `object_store`.
|
||||
pub async fn put_bytes_at(
|
||||
location: &str,
|
||||
data: Bytes,
|
||||
cfg: &AppConfig,
|
||||
) -> object_store::Result<()> {
|
||||
let store = build_store_root(cfg).await?;
|
||||
let payload = object_store::PutPayload::from_bytes(data);
|
||||
store.put(&ObjPath::from(location), payload).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read bytes from `file_name` within a filesystem `prefix` using the configured store.
|
||||
///
|
||||
/// Prefer [`get_bytes_at`] for location-based reads.
|
||||
pub async fn get_bytes(
|
||||
prefix: &Path,
|
||||
file_name: &str,
|
||||
cfg: &AppConfig,
|
||||
) -> object_store::Result<Bytes> {
|
||||
let store = build_store(prefix, cfg).await?;
|
||||
let r = store.get(&ObjPath::from(file_name)).await?;
|
||||
let b = r.bytes().await?;
|
||||
Ok(b)
|
||||
}
|
||||
|
||||
/// Read bytes from the provided logical object `location`.
|
||||
///
|
||||
/// Returns the full contents buffered in memory.
|
||||
pub async fn get_bytes_at(location: &str, cfg: &AppConfig) -> object_store::Result<Bytes> {
|
||||
let store = build_store_root(cfg).await?;
|
||||
let r = store.get(&ObjPath::from(location)).await?;
|
||||
r.bytes().await
|
||||
}
|
||||
|
||||
/// Get a streaming body for the provided logical object `location`.
|
||||
///
|
||||
/// Returns a fallible `BoxStream` of `Bytes`, suitable for use with
|
||||
/// `axum::body::Body::from_stream` to stream responses without buffering.
|
||||
pub async fn get_stream_at(
|
||||
location: &str,
|
||||
cfg: &AppConfig,
|
||||
) -> object_store::Result<BoxStream<'static, object_store::Result<Bytes>>> {
|
||||
let store = build_store_root(cfg).await?;
|
||||
let r = store.get(&ObjPath::from(location)).await?;
|
||||
Ok(r.into_stream())
|
||||
}
|
||||
|
||||
/// Delete all objects below the provided filesystem `prefix`.
|
||||
///
|
||||
/// This is a low-level variant for when a dedicated on-disk prefix is used for a
|
||||
/// particular object grouping. Prefer [`delete_prefix_at`] for location-based stores.
|
||||
pub async fn delete_prefix(prefix: &Path, cfg: &AppConfig) -> object_store::Result<()> {
|
||||
let store = build_store(prefix, cfg).await?;
|
||||
// list everything and delete
|
||||
let locations = store.list(None).map_ok(|m| m.location).boxed();
|
||||
store
|
||||
.delete_stream(locations)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
// Best effort remove the directory itself
|
||||
if tokio::fs::try_exists(prefix).await.unwrap_or(false) {
|
||||
let _ = tokio::fs::remove_dir_all(prefix).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete all objects below the provided logical object `prefix`, e.g. `"user/uuid/"`.
|
||||
///
|
||||
/// After deleting, attempts a best-effort cleanup of the now-empty directory on disk
|
||||
/// when using the local backend.
|
||||
pub async fn delete_prefix_at(prefix: &str, cfg: &AppConfig) -> object_store::Result<()> {
|
||||
let store = build_store_root(cfg).await?;
|
||||
let prefix_path = ObjPath::from(prefix);
|
||||
let locations = store
|
||||
.list(Some(&prefix_path))
|
||||
.map_ok(|m| m.location)
|
||||
.boxed();
|
||||
store
|
||||
.delete_stream(locations)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
// Best effort remove empty directory on disk for local storage
|
||||
let base_dir = resolve_base_dir(cfg).join(prefix);
|
||||
if tokio::fs::try_exists(&base_dir).await.unwrap_or(false) {
|
||||
let _ = tokio::fs::remove_dir_all(&base_dir).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Split an absolute filesystem path into `(parent_dir, file_name)`.
|
||||
pub fn split_abs_path(path: &str) -> AnyResult<(PathBuf, String)> {
|
||||
let pb = PathBuf::from(path);
|
||||
@@ -198,7 +466,6 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::utils::config::{PdfIngestMode::LlmFirst, StorageKind};
|
||||
use bytes::Bytes;
|
||||
use futures::TryStreamExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn test_config(root: &str) -> AppConfig {
|
||||
@@ -218,68 +485,353 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn test_config_memory() -> AppConfig {
|
||||
AppConfig {
|
||||
openai_api_key: "test".into(),
|
||||
surrealdb_address: "test".into(),
|
||||
surrealdb_username: "test".into(),
|
||||
surrealdb_password: "test".into(),
|
||||
surrealdb_namespace: "test".into(),
|
||||
surrealdb_database: "test".into(),
|
||||
data_dir: "/tmp/unused".into(), // Ignored for memory storage
|
||||
http_port: 0,
|
||||
openai_base_url: "..".into(),
|
||||
storage: StorageKind::Memory,
|
||||
pdf_ingest_mode: LlmFirst,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_build_store_root_creates_base() {
|
||||
let base = format!("/tmp/minne_store_test_{}", Uuid::new_v4());
|
||||
async fn test_storage_manager_memory_basic_operations() {
|
||||
let cfg = test_config_memory();
|
||||
let storage = StorageManager::new(&cfg)
|
||||
.await
|
||||
.expect("create storage manager");
|
||||
assert!(storage.local_base_path().is_none());
|
||||
|
||||
let location = "test/data/file.txt";
|
||||
let data = b"test data for storage manager";
|
||||
|
||||
// Test put and get
|
||||
storage
|
||||
.put(location, Bytes::from(data.to_vec()))
|
||||
.await
|
||||
.expect("put");
|
||||
let retrieved = storage.get(location).await.expect("get");
|
||||
assert_eq!(retrieved.as_ref(), data);
|
||||
|
||||
// Test exists
|
||||
assert!(storage.exists(location).await.expect("exists check"));
|
||||
|
||||
// Test delete
|
||||
storage.delete_prefix("test/data/").await.expect("delete");
|
||||
assert!(!storage
|
||||
.exists(location)
|
||||
.await
|
||||
.expect("exists check after delete"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_storage_manager_local_basic_operations() {
|
||||
let base = format!("/tmp/minne_storage_test_{}", Uuid::new_v4());
|
||||
let cfg = test_config(&base);
|
||||
let _ = build_store_root(&cfg).await.expect("build store root");
|
||||
assert!(tokio::fs::try_exists(&base).await.unwrap_or(false));
|
||||
let storage = StorageManager::new(&cfg)
|
||||
.await
|
||||
.expect("create storage manager");
|
||||
let resolved_base = storage
|
||||
.local_base_path()
|
||||
.expect("resolved base dir")
|
||||
.to_path_buf();
|
||||
assert_eq!(resolved_base, PathBuf::from(&base));
|
||||
|
||||
let location = "test/data/file.txt";
|
||||
let data = b"test data for local storage";
|
||||
|
||||
// Test put and get
|
||||
storage
|
||||
.put(location, Bytes::from(data.to_vec()))
|
||||
.await
|
||||
.expect("put");
|
||||
let retrieved = storage.get(location).await.expect("get");
|
||||
assert_eq!(retrieved.as_ref(), data);
|
||||
|
||||
let object_dir = resolved_base.join("test/data");
|
||||
tokio::fs::metadata(&object_dir)
|
||||
.await
|
||||
.expect("object directory exists after write");
|
||||
|
||||
// Test exists
|
||||
assert!(storage.exists(location).await.expect("exists check"));
|
||||
|
||||
// Test delete
|
||||
storage.delete_prefix("test/data/").await.expect("delete");
|
||||
assert!(!storage
|
||||
.exists(location)
|
||||
.await
|
||||
.expect("exists check after delete"));
|
||||
assert!(
|
||||
tokio::fs::metadata(&object_dir).await.is_err(),
|
||||
"object directory should be removed"
|
||||
);
|
||||
tokio::fs::metadata(&resolved_base)
|
||||
.await
|
||||
.expect("base directory remains intact");
|
||||
|
||||
// Clean up
|
||||
let _ = tokio::fs::remove_dir_all(&base).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_put_get_bytes_at_and_delete_prefix_at() {
|
||||
let base = format!("/tmp/minne_store_test_{}", Uuid::new_v4());
|
||||
let cfg = test_config(&base);
|
||||
|
||||
let location_prefix = format!("{}/{}", "user1", Uuid::new_v4());
|
||||
let file_name = "file.txt";
|
||||
let location = format!("{}/{}", &location_prefix, file_name);
|
||||
let payload = Bytes::from_static(b"hello world");
|
||||
|
||||
put_bytes_at(&location, payload.clone(), &cfg)
|
||||
async fn test_storage_manager_memory_persistence() {
|
||||
let cfg = test_config_memory();
|
||||
let storage = StorageManager::new(&cfg)
|
||||
.await
|
||||
.expect("put");
|
||||
let got = get_bytes_at(&location, &cfg).await.expect("get");
|
||||
assert_eq!(got.as_ref(), payload.as_ref());
|
||||
.expect("create storage manager");
|
||||
|
||||
// Delete the whole prefix and ensure retrieval fails
|
||||
delete_prefix_at(&location_prefix, &cfg)
|
||||
let location = "persistence/test.txt";
|
||||
let data1 = b"first data";
|
||||
let data2 = b"second data";
|
||||
|
||||
// Put first data
|
||||
storage
|
||||
.put(location, Bytes::from(data1.to_vec()))
|
||||
.await
|
||||
.expect("delete prefix");
|
||||
assert!(get_bytes_at(&location, &cfg).await.is_err());
|
||||
.expect("put first");
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(&base).await;
|
||||
// Retrieve and verify first data
|
||||
let retrieved1 = storage.get(location).await.expect("get first");
|
||||
assert_eq!(retrieved1.as_ref(), data1);
|
||||
|
||||
// Overwrite with second data
|
||||
storage
|
||||
.put(location, Bytes::from(data2.to_vec()))
|
||||
.await
|
||||
.expect("put second");
|
||||
|
||||
// Retrieve and verify second data
|
||||
let retrieved2 = storage.get(location).await.expect("get second");
|
||||
assert_eq!(retrieved2.as_ref(), data2);
|
||||
|
||||
// Data persists across multiple operations using the same StorageManager
|
||||
assert_ne!(retrieved1.as_ref(), retrieved2.as_ref());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_stream_at() {
|
||||
let base = format!("/tmp/minne_store_test_{}", Uuid::new_v4());
|
||||
let cfg = test_config(&base);
|
||||
async fn test_storage_manager_list_operations() {
|
||||
let cfg = test_config_memory();
|
||||
let storage = StorageManager::new(&cfg)
|
||||
.await
|
||||
.expect("create storage manager");
|
||||
|
||||
let location = format!("{}/{}/stream.bin", "user2", Uuid::new_v4());
|
||||
let content = vec![7u8; 32 * 1024]; // 32KB payload
|
||||
// Create multiple files
|
||||
let files = vec![
|
||||
("dir1/file1.txt", b"content1"),
|
||||
("dir1/file2.txt", b"content2"),
|
||||
("dir2/file3.txt", b"content3"),
|
||||
];
|
||||
|
||||
put_bytes_at(&location, Bytes::from(content.clone()), &cfg)
|
||||
for (location, data) in &files {
|
||||
storage
|
||||
.put(location, Bytes::from(data.to_vec()))
|
||||
.await
|
||||
.expect("put");
|
||||
}
|
||||
|
||||
// Test listing without prefix
|
||||
let all_files = storage.list(None).await.expect("list all");
|
||||
assert_eq!(all_files.len(), 3);
|
||||
|
||||
// Test listing with prefix
|
||||
let dir1_files = storage.list(Some("dir1/")).await.expect("list dir1");
|
||||
assert_eq!(dir1_files.len(), 2);
|
||||
assert!(dir1_files
|
||||
.iter()
|
||||
.any(|meta| meta.location.as_ref().contains("file1.txt")));
|
||||
assert!(dir1_files
|
||||
.iter()
|
||||
.any(|meta| meta.location.as_ref().contains("file2.txt")));
|
||||
|
||||
// Test listing non-existent prefix
|
||||
let empty_files = storage
|
||||
.list(Some("nonexistent/"))
|
||||
.await
|
||||
.expect("list nonexistent");
|
||||
assert_eq!(empty_files.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_storage_manager_stream_operations() {
|
||||
let cfg = test_config_memory();
|
||||
let storage = StorageManager::new(&cfg)
|
||||
.await
|
||||
.expect("create storage manager");
|
||||
|
||||
let location = "stream/test.bin";
|
||||
let content = vec![42u8; 1024 * 64]; // 64KB of data
|
||||
|
||||
// Put large data
|
||||
storage
|
||||
.put(location, Bytes::from(content.clone()))
|
||||
.await
|
||||
.expect("put large data");
|
||||
|
||||
// Get as stream
|
||||
let mut stream = storage.get_stream(location).await.expect("get stream");
|
||||
let mut collected = Vec::new();
|
||||
|
||||
while let Some(chunk) = stream.next().await {
|
||||
let chunk = chunk.expect("stream chunk");
|
||||
collected.extend_from_slice(&chunk);
|
||||
}
|
||||
|
||||
assert_eq!(collected, content);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_storage_manager_with_custom_backend() {
|
||||
use object_store::memory::InMemory;
|
||||
|
||||
// Create custom memory backend
|
||||
let custom_store = InMemory::new();
|
||||
let storage = StorageManager::with_backend(Arc::new(custom_store), StorageKind::Memory);
|
||||
|
||||
let location = "custom/test.txt";
|
||||
let data = b"custom backend test";
|
||||
|
||||
// Test operations with custom backend
|
||||
storage
|
||||
.put(location, Bytes::from(data.to_vec()))
|
||||
.await
|
||||
.expect("put");
|
||||
let retrieved = storage.get(location).await.expect("get");
|
||||
assert_eq!(retrieved.as_ref(), data);
|
||||
|
||||
let stream = get_stream_at(&location, &cfg).await.expect("stream");
|
||||
let combined: Vec<u8> = stream
|
||||
.map_ok(|chunk| chunk.to_vec())
|
||||
.try_fold(Vec::new(), |mut acc, mut chunk| async move {
|
||||
acc.append(&mut chunk);
|
||||
Ok(acc)
|
||||
})
|
||||
assert!(storage.exists(location).await.expect("exists"));
|
||||
assert_eq!(*storage.backend_kind(), StorageKind::Memory);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_storage_manager_error_handling() {
|
||||
let cfg = test_config_memory();
|
||||
let storage = StorageManager::new(&cfg)
|
||||
.await
|
||||
.expect("collect");
|
||||
.expect("create storage manager");
|
||||
|
||||
assert_eq!(combined, content);
|
||||
// Test getting non-existent file
|
||||
let result = storage.get("nonexistent.txt").await;
|
||||
assert!(result.is_err());
|
||||
|
||||
delete_prefix_at(&split_object_path(&location).unwrap().0, &cfg)
|
||||
// Test checking existence of non-existent file
|
||||
let exists = storage
|
||||
.exists("nonexistent.txt")
|
||||
.await
|
||||
.ok();
|
||||
.expect("exists check");
|
||||
assert!(!exists);
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(&base).await;
|
||||
// Test listing with invalid location (should not panic)
|
||||
let _result = storage.get("").await;
|
||||
// This may or may not error depending on the backend implementation
|
||||
// The important thing is that it doesn't panic
|
||||
}
|
||||
|
||||
// TestStorageManager tests
|
||||
#[tokio::test]
|
||||
async fn test_test_storage_manager_memory() {
|
||||
let test_storage = testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("create test storage");
|
||||
|
||||
let location = "test/storage/file.txt";
|
||||
let data = b"test data with TestStorageManager";
|
||||
|
||||
// Test put and get
|
||||
test_storage.put(location, data).await.expect("put");
|
||||
let retrieved = test_storage.get(location).await.expect("get");
|
||||
assert_eq!(retrieved.as_ref(), data);
|
||||
|
||||
// Test existence check
|
||||
assert!(test_storage.exists(location).await.expect("exists"));
|
||||
|
||||
// Test list
|
||||
let files = test_storage
|
||||
.list(Some("test/storage/"))
|
||||
.await
|
||||
.expect("list");
|
||||
assert_eq!(files.len(), 1);
|
||||
|
||||
// Test delete
|
||||
test_storage
|
||||
.delete_prefix("test/storage/")
|
||||
.await
|
||||
.expect("delete");
|
||||
assert!(!test_storage
|
||||
.exists(location)
|
||||
.await
|
||||
.expect("exists after delete"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_test_storage_manager_local() {
|
||||
let test_storage = testing::TestStorageManager::new_local()
|
||||
.await
|
||||
.expect("create test storage");
|
||||
|
||||
let location = "test/local/file.txt";
|
||||
let data = b"test data with local TestStorageManager";
|
||||
|
||||
// Test put and get
|
||||
test_storage.put(location, data).await.expect("put");
|
||||
let retrieved = test_storage.get(location).await.expect("get");
|
||||
assert_eq!(retrieved.as_ref(), data);
|
||||
|
||||
// Test existence check
|
||||
assert!(test_storage.exists(location).await.expect("exists"));
|
||||
|
||||
// The storage should be automatically cleaned up when test_storage is dropped
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_test_storage_manager_isolation() {
|
||||
let storage1 = testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("create test storage 1");
|
||||
let storage2 = testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("create test storage 2");
|
||||
|
||||
let location = "isolation/test.txt";
|
||||
let data1 = b"storage 1 data";
|
||||
let data2 = b"storage 2 data";
|
||||
|
||||
// Put different data in each storage
|
||||
storage1.put(location, data1).await.expect("put storage 1");
|
||||
storage2.put(location, data2).await.expect("put storage 2");
|
||||
|
||||
// Verify isolation
|
||||
let retrieved1 = storage1.get(location).await.expect("get storage 1");
|
||||
let retrieved2 = storage2.get(location).await.expect("get storage 2");
|
||||
|
||||
assert_eq!(retrieved1.as_ref(), data1);
|
||||
assert_eq!(retrieved2.as_ref(), data2);
|
||||
assert_ne!(retrieved1.as_ref(), retrieved2.as_ref());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_test_storage_manager_config() {
|
||||
let cfg = testing::test_config_memory();
|
||||
let test_storage = testing::TestStorageManager::with_config(&cfg)
|
||||
.await
|
||||
.expect("create test storage with config");
|
||||
|
||||
let location = "config/test.txt";
|
||||
let data = b"test data with custom config";
|
||||
|
||||
test_storage.put(location, data).await.expect("put");
|
||||
let retrieved = test_storage.get(location).await.expect("get");
|
||||
assert_eq!(retrieved.as_ref(), data);
|
||||
|
||||
// Verify it's using memory backend
|
||||
assert_eq!(*test_storage.storage().backend_kind(), StorageKind::Memory);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use axum_typed_multipart::FieldData;
|
||||
use bytes;
|
||||
use mime_guess::from_path;
|
||||
use object_store::Error as ObjectStoreError;
|
||||
use sha2::{Digest, Sha256};
|
||||
@@ -13,9 +14,8 @@ use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
error::AppError,
|
||||
storage::{db::SurrealDbClient, store},
|
||||
storage::{db::SurrealDbClient, store, store::StorageManager},
|
||||
stored_object,
|
||||
utils::config::AppConfig,
|
||||
};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@@ -51,54 +51,6 @@ stored_object!(FileInfo, "file", {
|
||||
});
|
||||
|
||||
impl FileInfo {
|
||||
pub async fn new(
|
||||
field_data: FieldData<NamedTempFile>,
|
||||
db_client: &SurrealDbClient,
|
||||
user_id: &str,
|
||||
config: &AppConfig,
|
||||
) -> Result<Self, FileError> {
|
||||
let file = field_data.contents;
|
||||
let file_name = field_data
|
||||
.metadata
|
||||
.file_name
|
||||
.ok_or(FileError::MissingFileName)?;
|
||||
|
||||
// Calculate SHA256
|
||||
let sha256 = Self::get_sha(&file).await?;
|
||||
|
||||
// Early return if file already exists
|
||||
match Self::get_by_sha(&sha256, db_client).await {
|
||||
Ok(existing_file) => {
|
||||
info!("File already exists with SHA256: {}", sha256);
|
||||
return Ok(existing_file);
|
||||
}
|
||||
Err(FileError::FileNotFound(_)) => (), // Expected case for new files
|
||||
Err(e) => return Err(e), // Propagate unexpected errors
|
||||
}
|
||||
|
||||
// Generate UUID and prepare paths
|
||||
let uuid = Uuid::new_v4();
|
||||
let sanitized_file_name = Self::sanitize_file_name(&file_name);
|
||||
|
||||
let now = Utc::now();
|
||||
// Create new FileInfo instance
|
||||
let file_info = Self {
|
||||
id: uuid.to_string(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
file_name,
|
||||
sha256,
|
||||
path: Self::persist_file(&uuid, file, &sanitized_file_name, user_id, config).await?,
|
||||
mime_type: Self::guess_mime_type(Path::new(&sanitized_file_name)),
|
||||
user_id: user_id.to_string(),
|
||||
};
|
||||
|
||||
// Store in database
|
||||
db_client.store_item(file_info.clone()).await?;
|
||||
|
||||
Ok(file_info)
|
||||
}
|
||||
|
||||
/// Guesses the MIME type based on the file extension.
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -167,36 +119,6 @@ impl FileInfo {
|
||||
}
|
||||
}
|
||||
|
||||
/// Persists the file under the logical location `{user_id}/{uuid}/{file_name}`.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `uuid` - The UUID of the file.
|
||||
/// * `file` - The temporary file to persist.
|
||||
/// * `file_name` - The sanitized file name.
|
||||
/// * `user-id` - User id
|
||||
/// * `config` - Application configuration containing data directory path
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Result<String, FileError>` - The logical object location or an error.
|
||||
async fn persist_file(
|
||||
uuid: &Uuid,
|
||||
file: NamedTempFile,
|
||||
file_name: &str,
|
||||
user_id: &str,
|
||||
config: &AppConfig,
|
||||
) -> Result<String, FileError> {
|
||||
// Logical object location relative to the store root
|
||||
let location = format!("{}/{}/{}", user_id, uuid, file_name);
|
||||
info!("Persisting to object location: {}", location);
|
||||
|
||||
let bytes = tokio::fs::read(file.path()).await?;
|
||||
store::put_bytes_at(&location, bytes.into(), config)
|
||||
.await
|
||||
.map_err(FileError::from)?;
|
||||
|
||||
Ok(location)
|
||||
}
|
||||
|
||||
/// Retrieves a `FileInfo` by SHA256.
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -215,41 +137,6 @@ impl FileInfo {
|
||||
.ok_or(FileError::FileNotFound(sha256.to_string()))
|
||||
}
|
||||
|
||||
/// Removes FileInfo from database and file from disk
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `id` - Id of the FileInfo
|
||||
/// * `db_client` - Reference to SurrealDbClient
|
||||
///
|
||||
/// # Returns
|
||||
/// `Result<(), FileError>`
|
||||
pub async fn delete_by_id(
|
||||
id: &str,
|
||||
db_client: &SurrealDbClient,
|
||||
config: &AppConfig,
|
||||
) -> Result<(), AppError> {
|
||||
// Get the FileInfo from the database
|
||||
let Some(file_info) = db_client.get_item::<FileInfo>(id).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// Remove the object's parent prefix in the object store
|
||||
let (parent_prefix, _file_name) = store::split_object_path(&file_info.path)
|
||||
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
|
||||
store::delete_prefix_at(&parent_prefix, config)
|
||||
.await
|
||||
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
|
||||
info!(
|
||||
"Removed object prefix {} and its contents via object_store",
|
||||
parent_prefix
|
||||
);
|
||||
|
||||
// Delete the FileInfo from the database
|
||||
db_client.delete_item::<FileInfo>(id).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieves a `FileInfo` by its ID.
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -265,34 +152,168 @@ impl FileInfo {
|
||||
Err(e) => Err(FileError::SurrealError(e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new FileInfo using StorageManager for persistent storage operations.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `field_data` - The uploaded file data
|
||||
/// * `db_client` - Reference to the SurrealDbClient
|
||||
/// * `user_id` - The user ID
|
||||
/// * `storage` - A StorageManager instance for storage operations
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Result<Self, FileError>` - The created FileInfo or an error
|
||||
pub async fn new_with_storage(
|
||||
field_data: FieldData<NamedTempFile>,
|
||||
db_client: &SurrealDbClient,
|
||||
user_id: &str,
|
||||
storage: &StorageManager,
|
||||
) -> Result<Self, FileError> {
|
||||
let file = field_data.contents;
|
||||
let file_name = field_data
|
||||
.metadata
|
||||
.file_name
|
||||
.ok_or(FileError::MissingFileName)?;
|
||||
let original_file_name = file_name.clone();
|
||||
|
||||
// Calculate SHA256
|
||||
let sha256 = Self::get_sha(&file).await?;
|
||||
|
||||
// Early return if file already exists
|
||||
match Self::get_by_sha(&sha256, db_client).await {
|
||||
Ok(existing_file) => {
|
||||
info!("File already exists with SHA256: {}", sha256);
|
||||
return Ok(existing_file);
|
||||
}
|
||||
Err(FileError::FileNotFound(_)) => (), // Expected case for new files
|
||||
Err(e) => return Err(e), // Propagate unexpected errors
|
||||
}
|
||||
|
||||
// Generate UUID and prepare paths
|
||||
let uuid = Uuid::new_v4();
|
||||
let sanitized_file_name = Self::sanitize_file_name(&file_name);
|
||||
let now = Utc::now();
|
||||
|
||||
let path =
|
||||
Self::persist_file_with_storage(&uuid, file, &sanitized_file_name, user_id, storage)
|
||||
.await?;
|
||||
|
||||
// Create FileInfo struct
|
||||
let file_info = FileInfo {
|
||||
id: uuid.to_string(),
|
||||
user_id: user_id.to_string(),
|
||||
sha256,
|
||||
file_name: original_file_name,
|
||||
path,
|
||||
mime_type: Self::guess_mime_type(Path::new(&file_name)),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
// Store in database
|
||||
db_client
|
||||
.store_item(file_info.clone())
|
||||
.await
|
||||
.map_err(FileError::SurrealError)?;
|
||||
|
||||
Ok(file_info)
|
||||
}
|
||||
|
||||
/// Delete a FileInfo by ID using StorageManager for storage operations.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `id` - ID of the FileInfo
|
||||
/// * `db_client` - Reference to SurrealDbClient
|
||||
/// * `storage` - A StorageManager instance for storage operations
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Result<(), AppError>` - Success or error
|
||||
pub async fn delete_by_id_with_storage(
|
||||
id: &str,
|
||||
db_client: &SurrealDbClient,
|
||||
storage: &StorageManager,
|
||||
) -> Result<(), AppError> {
|
||||
// Get the FileInfo from the database
|
||||
let Some(file_info) = db_client.get_item::<FileInfo>(id).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// Remove the object's parent prefix in the object store
|
||||
let (parent_prefix, _file_name) = store::split_object_path(&file_info.path)
|
||||
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
|
||||
storage
|
||||
.delete_prefix(&parent_prefix)
|
||||
.await
|
||||
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
|
||||
info!(
|
||||
"Removed object prefix {} and its contents via StorageManager",
|
||||
parent_prefix
|
||||
);
|
||||
|
||||
// Delete the FileInfo from the database
|
||||
db_client.delete_item::<FileInfo>(id).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieve file content using StorageManager for storage operations.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `storage` - A StorageManager instance for storage operations
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Result<bytes::Bytes, AppError>` - The file content or an error
|
||||
pub async fn get_content_with_storage(
|
||||
&self,
|
||||
storage: &StorageManager,
|
||||
) -> Result<bytes::Bytes, AppError> {
|
||||
storage
|
||||
.get(&self.path)
|
||||
.await
|
||||
.map_err(|e: object_store::Error| AppError::from(anyhow::anyhow!(e)))
|
||||
}
|
||||
|
||||
/// Persist file to storage using StorageManager.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `uuid` - The UUID for the file
|
||||
/// * `file` - The temporary file to persist
|
||||
/// * `file_name` - The name of the file
|
||||
/// * `user_id` - The user ID
|
||||
/// * `storage` - A StorageManager instance for storage operations
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Result<String, FileError>` - The logical object location or an error.
|
||||
async fn persist_file_with_storage(
|
||||
uuid: &Uuid,
|
||||
file: NamedTempFile,
|
||||
file_name: &str,
|
||||
user_id: &str,
|
||||
storage: &StorageManager,
|
||||
) -> Result<String, FileError> {
|
||||
// Logical object location relative to the store root
|
||||
let location = format!("{}/{}/{}", user_id, uuid, file_name);
|
||||
info!("Persisting to object location: {}", location);
|
||||
|
||||
let bytes = tokio::fs::read(file.path()).await?;
|
||||
storage
|
||||
.put(&location, bytes.into())
|
||||
.await
|
||||
.map_err(FileError::from)?;
|
||||
|
||||
Ok(location)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::utils::config::{AppConfig, PdfIngestMode::LlmFirst, StorageKind};
|
||||
use crate::storage::store::testing::TestStorageManager;
|
||||
use axum::http::HeaderMap;
|
||||
use axum_typed_multipart::FieldMetadata;
|
||||
use std::io::Write;
|
||||
use std::{io::Write, path::Path};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
fn test_config(data_dir: &str) -> AppConfig {
|
||||
AppConfig {
|
||||
data_dir: data_dir.to_string(),
|
||||
openai_api_key: "test_key".to_string(),
|
||||
surrealdb_address: "test_address".to_string(),
|
||||
surrealdb_username: "test_user".to_string(),
|
||||
surrealdb_password: "test_pass".to_string(),
|
||||
surrealdb_namespace: "test_ns".to_string(),
|
||||
surrealdb_database: "test_db".to_string(),
|
||||
http_port: 3000,
|
||||
openai_base_url: "..".to_string(),
|
||||
storage: StorageKind::Local,
|
||||
pdf_ingest_mode: LlmFirst,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a test temporary file with the given content
|
||||
fn create_test_file(content: &[u8], file_name: &str) -> FieldData<NamedTempFile> {
|
||||
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
|
||||
@@ -316,33 +337,39 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_fileinfo_create_read_delete() {
|
||||
// Setup in-memory database for testing
|
||||
async fn test_fileinfo_create_read_delete_with_storage_manager() {
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
db.apply_migrations().await.unwrap();
|
||||
|
||||
// Create a test file
|
||||
let content = b"This is a test file for cross-filesystem operations";
|
||||
let file_name = "cross_fs_test.txt";
|
||||
let content = b"This is a test file for StorageManager operations";
|
||||
let file_name = "storage_manager_test.txt";
|
||||
let field_data = create_test_file(content, file_name);
|
||||
|
||||
// Create a FileInfo instance with data_dir in /tmp
|
||||
// Create test storage manager (memory backend)
|
||||
let test_storage = store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("Failed to create test storage manager");
|
||||
|
||||
// Create a FileInfo instance with storage manager
|
||||
let user_id = "test_user";
|
||||
let config = test_config("/tmp/minne_test_data");
|
||||
|
||||
// Test file creation
|
||||
let file_info = FileInfo::new(field_data, &db, user_id, &config)
|
||||
.await
|
||||
.expect("Failed to create file across filesystems");
|
||||
// Test file creation with StorageManager
|
||||
let file_info =
|
||||
FileInfo::new_with_storage(field_data, &db, user_id, test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to create file with StorageManager");
|
||||
assert_eq!(file_info.file_name, file_name);
|
||||
|
||||
// Verify the file exists via object_store and has correct content
|
||||
let bytes = store::get_bytes_at(&file_info.path, &config)
|
||||
// Verify the file exists via StorageManager and has correct content
|
||||
let bytes = file_info
|
||||
.get_content_with_storage(test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to read file content via object_store");
|
||||
assert_eq!(bytes, content.as_slice());
|
||||
.expect("Failed to read file content via StorageManager");
|
||||
assert_eq!(bytes.as_ref(), content);
|
||||
|
||||
// Test file reading
|
||||
let retrieved = FileInfo::get_by_id(&file_info.id, &db)
|
||||
@@ -350,51 +377,89 @@ mod tests {
|
||||
.expect("Failed to retrieve file info");
|
||||
assert_eq!(retrieved.id, file_info.id);
|
||||
assert_eq!(retrieved.sha256, file_info.sha256);
|
||||
assert_eq!(retrieved.file_name, file_name);
|
||||
|
||||
// Test file deletion
|
||||
FileInfo::delete_by_id(&file_info.id, &db, &config)
|
||||
// Test file deletion with StorageManager
|
||||
FileInfo::delete_by_id_with_storage(&file_info.id, &db, test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to delete file");
|
||||
assert!(
|
||||
store::get_bytes_at(&file_info.path, &config).await.is_err(),
|
||||
"File should be deleted"
|
||||
);
|
||||
.expect("Failed to delete file with StorageManager");
|
||||
|
||||
// Clean up the test directory
|
||||
let _ = tokio::fs::remove_dir_all(&config.data_dir).await;
|
||||
let deleted_result = file_info
|
||||
.get_content_with_storage(test_storage.storage())
|
||||
.await;
|
||||
assert!(deleted_result.is_err(), "File should be deleted");
|
||||
|
||||
// No cleanup needed - TestStorageManager handles it automatically
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_fileinfo_duplicate_detection() {
|
||||
// Setup in-memory database for testing
|
||||
async fn test_fileinfo_preserves_original_filename_and_sanitizes_path() {
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
db.apply_migrations().await.unwrap();
|
||||
|
||||
// Create a test file
|
||||
let content = b"This is a test file for cross-filesystem duplicate detection";
|
||||
let file_name = "cross_fs_duplicate.txt";
|
||||
let content = b"filename sanitization";
|
||||
let original_name = "Complex name (1).txt";
|
||||
let expected_sanitized = "Complex_name__1_.txt";
|
||||
let field_data = create_test_file(content, original_name);
|
||||
|
||||
let test_storage = store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("Failed to create test storage manager");
|
||||
|
||||
let file_info =
|
||||
FileInfo::new_with_storage(field_data, &db, "sanitized_user", test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to create file via storage manager");
|
||||
|
||||
assert_eq!(file_info.file_name, original_name);
|
||||
|
||||
let stored_name = Path::new(&file_info.path)
|
||||
.file_name()
|
||||
.and_then(|name| name.to_str())
|
||||
.expect("stored name");
|
||||
assert_eq!(stored_name, expected_sanitized);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_fileinfo_duplicate_detection_with_storage_manager() {
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
db.apply_migrations().await.unwrap();
|
||||
|
||||
let content = b"This is a test file for StorageManager duplicate detection";
|
||||
let file_name = "storage_manager_duplicate.txt";
|
||||
let field_data = create_test_file(content, file_name);
|
||||
|
||||
// Create a FileInfo instance with data_dir in /tmp
|
||||
// Create test storage manager
|
||||
let test_storage = store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("Failed to create test storage manager");
|
||||
|
||||
// Create a FileInfo instance with storage manager
|
||||
let user_id = "test_user";
|
||||
let config = test_config("/tmp/minne_test_data");
|
||||
|
||||
// Store the original file
|
||||
let original_file_info = FileInfo::new(field_data, &db, user_id, &config)
|
||||
.await
|
||||
.expect("Failed to create original file");
|
||||
let original_file_info =
|
||||
FileInfo::new_with_storage(field_data, &db, user_id, test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to create original file with StorageManager");
|
||||
|
||||
// Create another file with the same content but different name
|
||||
let duplicate_name = "cross_fs_duplicate_2.txt";
|
||||
let duplicate_name = "storage_manager_duplicate_2.txt";
|
||||
let field_data2 = create_test_file(content, duplicate_name);
|
||||
|
||||
// The system should detect it's the same file and return the original FileInfo
|
||||
let duplicate_file_info = FileInfo::new(field_data2, &db, user_id, &config)
|
||||
.await
|
||||
.expect("Failed to process duplicate file");
|
||||
let duplicate_file_info =
|
||||
FileInfo::new_with_storage(field_data2, &db, user_id, test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to process duplicate file with StorageManager");
|
||||
|
||||
// Verify duplicate detection worked
|
||||
assert_eq!(duplicate_file_info.id, original_file_info.id);
|
||||
@@ -402,34 +467,48 @@ mod tests {
|
||||
assert_eq!(duplicate_file_info.file_name, file_name);
|
||||
assert_ne!(duplicate_file_info.file_name, duplicate_name);
|
||||
|
||||
// Clean up
|
||||
FileInfo::delete_by_id(&original_file_info.id, &db, &config)
|
||||
// Verify both files have the same content (they should point to the same file)
|
||||
let original_content = original_file_info
|
||||
.get_content_with_storage(test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to delete file");
|
||||
let _ = tokio::fs::remove_dir_all(&config.data_dir).await;
|
||||
.unwrap();
|
||||
let duplicate_content = duplicate_file_info
|
||||
.get_content_with_storage(test_storage.storage())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(original_content.as_ref(), content);
|
||||
assert_eq!(duplicate_content.as_ref(), content);
|
||||
|
||||
// Clean up
|
||||
FileInfo::delete_by_id_with_storage(&original_file_info.id, &db, test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to delete original file with StorageManager");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_creation() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
|
||||
// Create a test file
|
||||
let content = b"This is a test file content";
|
||||
let file_name = "test_file.txt";
|
||||
let field_data = create_test_file(content, file_name);
|
||||
|
||||
// Create a FileInfo instance
|
||||
// Create a FileInfo instance with StorageManager
|
||||
let user_id = "test_user";
|
||||
let config = test_config("./data");
|
||||
let file_info = FileInfo::new(field_data, &db, user_id, &config).await;
|
||||
let test_storage = TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("create test storage manager");
|
||||
let file_info =
|
||||
FileInfo::new_with_storage(field_data, &db, user_id, test_storage.storage()).await;
|
||||
|
||||
// We can't fully test persistence to disk in unit tests,
|
||||
// but we can verify the database record was created
|
||||
// Verify the FileInfo was created successfully
|
||||
assert!(file_info.is_ok());
|
||||
let file_info = file_info.unwrap();
|
||||
|
||||
@@ -459,33 +538,39 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_duplicate_detection() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
|
||||
// First, store a file with known content
|
||||
let content = b"This is a test file for duplicate detection";
|
||||
let file_name = "original.txt";
|
||||
let user_id = "test_user";
|
||||
|
||||
let config = test_config("./data");
|
||||
let test_storage = TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("create test storage manager");
|
||||
|
||||
let field_data1 = create_test_file(content, file_name);
|
||||
let original_file_info = FileInfo::new(field_data1, &db, user_id, &config)
|
||||
.await
|
||||
.expect("Failed to create original file");
|
||||
let original_file_info =
|
||||
FileInfo::new_with_storage(field_data1, &db, user_id, test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to create original file");
|
||||
|
||||
// Now try to store another file with the same content but different name
|
||||
let duplicate_name = "duplicate.txt";
|
||||
let field_data2 = create_test_file(content, duplicate_name);
|
||||
|
||||
// The system should detect it's the same file and return the original FileInfo
|
||||
let duplicate_file_info = FileInfo::new(field_data2, &db, user_id, &config)
|
||||
.await
|
||||
.expect("Failed to process duplicate file");
|
||||
let duplicate_file_info =
|
||||
FileInfo::new_with_storage(field_data2, &db, user_id, test_storage.storage())
|
||||
.await
|
||||
.expect("Failed to process duplicate file");
|
||||
|
||||
// The returned FileInfo should match the original
|
||||
assert_eq!(duplicate_file_info.id, original_file_info.id);
|
||||
@@ -553,7 +638,6 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_by_sha_not_found() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
@@ -574,7 +658,6 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manual_file_info_creation() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
@@ -615,23 +698,28 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_by_id() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
|
||||
// Create and persist a test file via FileInfo::new
|
||||
// Create and persist a test file via FileInfo::new_with_storage
|
||||
let user_id = "user123";
|
||||
let cfg = test_config("./data");
|
||||
let test_storage = TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("create test storage manager");
|
||||
let temp = create_test_file(b"test content", "test_file.txt");
|
||||
let file_info = FileInfo::new(temp, &db, user_id, &cfg)
|
||||
let file_info = FileInfo::new_with_storage(temp, &db, user_id, test_storage.storage())
|
||||
.await
|
||||
.expect("create file");
|
||||
|
||||
// Delete the file
|
||||
let delete_result = FileInfo::delete_by_id(&file_info.id, &db, &cfg).await;
|
||||
// Delete the file using StorageManager
|
||||
let delete_result =
|
||||
FileInfo::delete_by_id_with_storage(&file_info.id, &db, test_storage.storage()).await;
|
||||
|
||||
// Delete should be successful
|
||||
assert!(
|
||||
@@ -650,13 +738,12 @@ mod tests {
|
||||
"FileInfo should be deleted from the database"
|
||||
);
|
||||
|
||||
// Verify content no longer retrievable
|
||||
assert!(store::get_bytes_at(&file_info.path, &cfg).await.is_err());
|
||||
// Verify content no longer retrievable from storage
|
||||
assert!(test_storage.storage().get(&file_info.path).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_by_id_not_found() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
@@ -664,19 +751,16 @@ mod tests {
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
|
||||
// Try to delete a file that doesn't exist
|
||||
let result = FileInfo::delete_by_id(
|
||||
"nonexistent_id",
|
||||
&db,
|
||||
&test_config("./data"),
|
||||
)
|
||||
.await;
|
||||
let test_storage = TestStorageManager::new_memory().await.unwrap();
|
||||
let result =
|
||||
FileInfo::delete_by_id_with_storage("nonexistent_id", &db, test_storage.storage())
|
||||
.await;
|
||||
|
||||
// Should succeed even if the file record does not exist
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
#[tokio::test]
|
||||
async fn test_get_by_id() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
@@ -717,7 +801,6 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_by_id_not_found() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
@@ -741,43 +824,197 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
// StorageManager-based tests
|
||||
#[tokio::test]
|
||||
async fn test_fileinfo_persist_with_custom_root() {
|
||||
// Setup in-memory database for testing
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
async fn test_file_info_new_with_storage_memory() {
|
||||
// Setup
|
||||
let db = SurrealDbClient::memory("test_ns", "test_file_storage_memory")
|
||||
.await
|
||||
.expect("Failed to start in-memory surrealdb");
|
||||
.unwrap();
|
||||
db.apply_migrations().await.unwrap();
|
||||
|
||||
// Create a test file
|
||||
let content = b"This is a test file for data directory configuration";
|
||||
let file_name = "data_dir_test.txt";
|
||||
let field_data = create_test_file(content, file_name);
|
||||
|
||||
// Create a FileInfo instance with a custom data directory
|
||||
let content = b"This is a test file for StorageManager";
|
||||
let field_data = create_test_file(content, "test_storage.txt");
|
||||
let user_id = "test_user";
|
||||
let custom_data_dir = "/tmp/minne_custom_data_dir";
|
||||
let config = test_config(custom_data_dir);
|
||||
|
||||
// Test file creation
|
||||
let file_info = FileInfo::new(field_data, &db, user_id, &config)
|
||||
// Create test storage manager
|
||||
let storage = store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.expect("Failed to create file in custom data directory");
|
||||
.unwrap();
|
||||
|
||||
// Verify the file has the correct content via object_store
|
||||
let file_content = store::get_bytes_at(&file_info.path, &config)
|
||||
// Test file creation with StorageManager
|
||||
let file_info = FileInfo::new_with_storage(field_data, &db, user_id, storage.storage())
|
||||
.await
|
||||
.expect("Failed to read file content");
|
||||
assert_eq!(file_content.as_ref(), content);
|
||||
.expect("Failed to create file with StorageManager");
|
||||
|
||||
// Test file deletion
|
||||
FileInfo::delete_by_id(&file_info.id, &db, &config)
|
||||
// Verify the file was created correctly
|
||||
assert_eq!(file_info.user_id, user_id);
|
||||
assert_eq!(file_info.file_name, "test_storage.txt");
|
||||
assert!(!file_info.sha256.is_empty());
|
||||
assert!(!file_info.path.is_empty());
|
||||
|
||||
// Test content retrieval with StorageManager
|
||||
let retrieved_content = file_info
|
||||
.get_content_with_storage(storage.storage())
|
||||
.await
|
||||
.expect("Failed to delete file");
|
||||
assert!(store::get_bytes_at(&file_info.path, &config).await.is_err());
|
||||
.expect("Failed to get file content with StorageManager");
|
||||
assert_eq!(retrieved_content.as_ref(), content);
|
||||
|
||||
// Clean up the test directory
|
||||
let _ = tokio::fs::remove_dir_all(custom_data_dir).await;
|
||||
// Test file deletion with StorageManager
|
||||
FileInfo::delete_by_id_with_storage(&file_info.id, &db, storage.storage())
|
||||
.await
|
||||
.expect("Failed to delete file with StorageManager");
|
||||
|
||||
// Verify file is deleted
|
||||
let deleted_content_result = file_info.get_content_with_storage(storage.storage()).await;
|
||||
assert!(deleted_content_result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_info_new_with_storage_local() {
|
||||
// Setup
|
||||
let db = SurrealDbClient::memory("test_ns", "test_file_storage_local")
|
||||
.await
|
||||
.unwrap();
|
||||
db.apply_migrations().await.unwrap();
|
||||
|
||||
let content = b"This is a test file for StorageManager with local storage";
|
||||
let field_data = create_test_file(content, "test_local.txt");
|
||||
let user_id = "test_user";
|
||||
|
||||
// Create test storage manager with local backend
|
||||
let storage = store::testing::TestStorageManager::new_local()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Test file creation with StorageManager
|
||||
let file_info = FileInfo::new_with_storage(field_data, &db, user_id, storage.storage())
|
||||
.await
|
||||
.expect("Failed to create file with StorageManager");
|
||||
|
||||
// Verify the file was created correctly
|
||||
assert_eq!(file_info.user_id, user_id);
|
||||
assert_eq!(file_info.file_name, "test_local.txt");
|
||||
assert!(!file_info.sha256.is_empty());
|
||||
assert!(!file_info.path.is_empty());
|
||||
|
||||
// Test content retrieval with StorageManager
|
||||
let retrieved_content = file_info
|
||||
.get_content_with_storage(storage.storage())
|
||||
.await
|
||||
.expect("Failed to get file content with StorageManager");
|
||||
assert_eq!(retrieved_content.as_ref(), content);
|
||||
|
||||
// Test file deletion with StorageManager
|
||||
FileInfo::delete_by_id_with_storage(&file_info.id, &db, storage.storage())
|
||||
.await
|
||||
.expect("Failed to delete file with StorageManager");
|
||||
|
||||
// Verify file is deleted
|
||||
let deleted_content_result = file_info.get_content_with_storage(storage.storage()).await;
|
||||
assert!(deleted_content_result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_info_storage_manager_persistence() {
|
||||
// Setup
|
||||
let db = SurrealDbClient::memory("test_ns", "test_file_persistence")
|
||||
.await
|
||||
.unwrap();
|
||||
db.apply_migrations().await.unwrap();
|
||||
|
||||
let content = b"Test content for persistence";
|
||||
let field_data = create_test_file(content, "persistence_test.txt");
|
||||
let user_id = "test_user";
|
||||
|
||||
// Create test storage manager
|
||||
let storage = store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create file
|
||||
let file_info = FileInfo::new_with_storage(field_data, &db, user_id, storage.storage())
|
||||
.await
|
||||
.expect("Failed to create file");
|
||||
|
||||
// Test that data persists across multiple operations with the same StorageManager
|
||||
let retrieved_content_1 = file_info
|
||||
.get_content_with_storage(storage.storage())
|
||||
.await
|
||||
.unwrap();
|
||||
let retrieved_content_2 = file_info
|
||||
.get_content_with_storage(storage.storage())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(retrieved_content_1.as_ref(), content);
|
||||
assert_eq!(retrieved_content_2.as_ref(), content);
|
||||
|
||||
// Test that different StorageManager instances don't share data (memory storage isolation)
|
||||
let storage2 = store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.unwrap();
|
||||
let isolated_content_result = file_info.get_content_with_storage(storage2.storage()).await;
|
||||
assert!(
|
||||
isolated_content_result.is_err(),
|
||||
"Different StorageManager should not have access to same data"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_info_storage_manager_equivalence() {
|
||||
// Setup
|
||||
let db = SurrealDbClient::memory("test_ns", "test_file_equivalence")
|
||||
.await
|
||||
.unwrap();
|
||||
db.apply_migrations().await.unwrap();
|
||||
|
||||
let content = b"Test content for equivalence testing";
|
||||
let field_data1 = create_test_file(content, "equivalence_test_1.txt");
|
||||
let field_data2 = create_test_file(content, "equivalence_test_2.txt");
|
||||
let user_id = "test_user";
|
||||
|
||||
// Create single storage manager and reuse it
|
||||
let storage_manager = store::testing::TestStorageManager::new_memory()
|
||||
.await
|
||||
.unwrap();
|
||||
let storage = storage_manager.storage();
|
||||
|
||||
// Create multiple files with the same storage manager
|
||||
let file_info_1 = FileInfo::new_with_storage(field_data1, &db, user_id, &storage)
|
||||
.await
|
||||
.expect("Failed to create file 1");
|
||||
|
||||
let file_info_2 = FileInfo::new_with_storage(field_data2, &db, user_id, &storage)
|
||||
.await
|
||||
.expect("Failed to create file 2");
|
||||
|
||||
// Test that both files can be retrieved with the same storage backend
|
||||
let content_1 = file_info_1
|
||||
.get_content_with_storage(&storage)
|
||||
.await
|
||||
.unwrap();
|
||||
let content_2 = file_info_2
|
||||
.get_content_with_storage(&storage)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(content_1.as_ref(), content);
|
||||
assert_eq!(content_2.as_ref(), content);
|
||||
|
||||
// Test that files can be deleted with the same storage manager
|
||||
FileInfo::delete_by_id_with_storage(&file_info_1.id, &db, &storage)
|
||||
.await
|
||||
.unwrap();
|
||||
FileInfo::delete_by_id_with_storage(&file_info_2.id, &db, &storage)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify files are deleted
|
||||
let deleted_content_1 = file_info_1.get_content_with_storage(&storage).await;
|
||||
let deleted_content_2 = file_info_2.get_content_with_storage(&storage).await;
|
||||
|
||||
assert!(deleted_content_1.is_err());
|
||||
assert!(deleted_content_2.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -459,4 +459,44 @@ mod tests {
|
||||
let retrieved: Option<Scratchpad> = db.get_item(&scratchpad_id).await.unwrap();
|
||||
assert!(retrieved.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_timezone_aware_scratchpad_conversion() {
|
||||
let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string())
|
||||
.await
|
||||
.expect("Failed to create test database");
|
||||
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("Failed to apply migrations");
|
||||
|
||||
let user_id = "test_user_123";
|
||||
let scratchpad =
|
||||
Scratchpad::new(user_id.to_string(), "Test Timezone Scratchpad".to_string());
|
||||
let scratchpad_id = scratchpad.id.clone();
|
||||
|
||||
db.store_item(scratchpad).await.unwrap();
|
||||
|
||||
let retrieved = Scratchpad::get_by_id(&scratchpad_id, user_id, &db)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Test that datetime fields are preserved and can be used for timezone formatting
|
||||
assert!(retrieved.created_at.timestamp() > 0);
|
||||
assert!(retrieved.updated_at.timestamp() > 0);
|
||||
assert!(retrieved.last_saved_at.timestamp() > 0);
|
||||
|
||||
// Test that optional datetime fields work correctly
|
||||
assert!(retrieved.archived_at.is_none());
|
||||
assert!(retrieved.ingested_at.is_none());
|
||||
|
||||
// Archive the scratchpad to test optional datetime handling
|
||||
let archived = Scratchpad::archive(&scratchpad_id, user_id, &db, false)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(archived.archived_at.is_some());
|
||||
assert!(archived.archived_at.unwrap().timestamp() > 0);
|
||||
assert!(archived.ingested_at.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,10 +2,11 @@ use config::{Config, ConfigError, Environment, File};
|
||||
use serde::Deserialize;
|
||||
use std::env;
|
||||
|
||||
#[derive(Clone, Deserialize, Debug)]
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum StorageKind {
|
||||
Local,
|
||||
Memory,
|
||||
}
|
||||
|
||||
fn default_storage_kind() -> StorageKind {
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
src = ./.;
|
||||
filter = let
|
||||
extraPaths = [
|
||||
(toString ./Cargo.lock)
|
||||
(toString ./common/migrations)
|
||||
(toString ./common/schemas)
|
||||
(toString ./html-router/templates)
|
||||
|
||||
@@ -419,6 +419,10 @@
|
||||
document.addEventListener('DOMContentLoaded', () => tryRender(document));
|
||||
|
||||
// HTMX partial swaps
|
||||
document.body.addEventListener('knowledge-graph-refresh', () => {
|
||||
tryRender(document);
|
||||
});
|
||||
|
||||
document.body.addEventListener('htmx:afterSettle', (evt) => {
|
||||
tryRender(evt && evt.target ? evt.target : document);
|
||||
});
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,4 +1,4 @@
|
||||
use common::storage::db::SurrealDbClient;
|
||||
use common::storage::{db::SurrealDbClient, store::StorageManager};
|
||||
use common::utils::template_engine::{ProvidesTemplateEngine, TemplateEngine};
|
||||
use common::{create_template_engine, storage::db::ProvidesDb, utils::config::AppConfig};
|
||||
use composite_retrieval::reranking::RerankerPool;
|
||||
@@ -14,14 +14,16 @@ pub struct HtmlState {
|
||||
pub templates: Arc<TemplateEngine>,
|
||||
pub session_store: Arc<SessionStoreType>,
|
||||
pub config: AppConfig,
|
||||
pub storage: StorageManager,
|
||||
pub reranker_pool: Option<Arc<RerankerPool>>,
|
||||
}
|
||||
|
||||
impl HtmlState {
|
||||
pub fn new_with_resources(
|
||||
pub async fn new_with_resources(
|
||||
db: Arc<SurrealDbClient>,
|
||||
openai_client: Arc<OpenAIClientType>,
|
||||
session_store: Arc<SessionStoreType>,
|
||||
storage: StorageManager,
|
||||
config: AppConfig,
|
||||
reranker_pool: Option<Arc<RerankerPool>>,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
@@ -34,6 +36,7 @@ impl HtmlState {
|
||||
session_store,
|
||||
templates: Arc::new(template_engine),
|
||||
config,
|
||||
storage,
|
||||
reranker_pool,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
use async_openai::types::ListModelResponse;
|
||||
use axum::{extract::State, response::IntoResponse, Form};
|
||||
use axum::{
|
||||
extract::{Query, State},
|
||||
response::IntoResponse,
|
||||
Form,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use common::{
|
||||
@@ -31,44 +35,83 @@ use crate::{
|
||||
pub struct AdminPanelData {
|
||||
user: User,
|
||||
settings: SystemSettings,
|
||||
analytics: Analytics,
|
||||
users: i64,
|
||||
analytics: Option<Analytics>,
|
||||
users: Option<i64>,
|
||||
default_query_prompt: String,
|
||||
default_image_prompt: String,
|
||||
conversation_archive: Vec<Conversation>,
|
||||
available_models: ListModelResponse,
|
||||
available_models: Option<ListModelResponse>,
|
||||
current_section: AdminSection,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum AdminSection {
|
||||
Overview,
|
||||
Models,
|
||||
}
|
||||
|
||||
impl Default for AdminSection {
|
||||
fn default() -> Self {
|
||||
Self::Overview
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct AdminPanelQuery {
|
||||
section: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn show_admin_panel(
|
||||
State(state): State<HtmlState>,
|
||||
RequireUser(user): RequireUser,
|
||||
Query(query): Query<AdminPanelQuery>,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
let (
|
||||
settings_res,
|
||||
analytics_res,
|
||||
user_count_res,
|
||||
conversation_archive_res,
|
||||
available_models_res,
|
||||
) = tokio::join!(
|
||||
let section = match query.section.as_deref() {
|
||||
Some("models") => AdminSection::Models,
|
||||
_ => AdminSection::Overview,
|
||||
};
|
||||
|
||||
let (settings, conversation_archive) = tokio::try_join!(
|
||||
SystemSettings::get_current(&state.db),
|
||||
Analytics::get_current(&state.db),
|
||||
Analytics::get_users_amount(&state.db),
|
||||
User::get_user_conversations(&user.id, &state.db),
|
||||
async { state.openai_client.models().list().await }
|
||||
);
|
||||
User::get_user_conversations(&user.id, &state.db)
|
||||
)?;
|
||||
|
||||
let (analytics, users) = if section == AdminSection::Overview {
|
||||
let (analytics, users) = tokio::try_join!(
|
||||
Analytics::get_current(&state.db),
|
||||
Analytics::get_users_amount(&state.db)
|
||||
)?;
|
||||
(Some(analytics), Some(users))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
let available_models = if section == AdminSection::Models {
|
||||
Some(
|
||||
state
|
||||
.openai_client
|
||||
.models()
|
||||
.list()
|
||||
.await
|
||||
.map_err(|e| AppError::InternalError(e.to_string()))?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(TemplateResponse::new_template(
|
||||
"admin/base.html",
|
||||
AdminPanelData {
|
||||
user,
|
||||
settings: settings_res?,
|
||||
analytics: analytics_res?,
|
||||
available_models: available_models_res
|
||||
.map_err(|e| AppError::InternalError(e.to_string()))?,
|
||||
users: user_count_res?,
|
||||
settings,
|
||||
analytics,
|
||||
available_models,
|
||||
users,
|
||||
default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
|
||||
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
|
||||
conversation_archive: conversation_archive_res?,
|
||||
conversation_archive,
|
||||
current_section: section,
|
||||
},
|
||||
))
|
||||
}
|
||||
@@ -115,7 +158,7 @@ pub async fn toggle_registration_status(
|
||||
SystemSettings::update(&state.db, new_settings.clone()).await?;
|
||||
|
||||
Ok(TemplateResponse::new_partial(
|
||||
"admin/base.html",
|
||||
"admin/sections/overview.html",
|
||||
"registration_status_input",
|
||||
RegistrationToggleData {
|
||||
settings: new_settings,
|
||||
@@ -217,7 +260,7 @@ pub async fn update_model_settings(
|
||||
.map_err(|_e| AppError::InternalError("Failed to get models".to_string()))?;
|
||||
|
||||
Ok(TemplateResponse::new_partial(
|
||||
"admin/base.html",
|
||||
"admin/sections/models.html",
|
||||
"model_settings_form",
|
||||
ModelSettingsData {
|
||||
settings: new_settings,
|
||||
@@ -282,7 +325,7 @@ pub async fn patch_query_prompt(
|
||||
SystemSettings::update(&state.db, new_settings.clone()).await?;
|
||||
|
||||
Ok(TemplateResponse::new_partial(
|
||||
"admin/base.html",
|
||||
"admin/sections/overview.html",
|
||||
"system_prompt_section",
|
||||
SystemPromptSectionData {
|
||||
settings: new_settings,
|
||||
@@ -341,7 +384,7 @@ pub async fn patch_ingestion_prompt(
|
||||
SystemSettings::update(&state.db, new_settings.clone()).await?;
|
||||
|
||||
Ok(TemplateResponse::new_partial(
|
||||
"admin/base.html",
|
||||
"admin/sections/overview.html",
|
||||
"system_prompt_section",
|
||||
SystemPromptSectionData {
|
||||
settings: new_settings,
|
||||
@@ -400,7 +443,7 @@ pub async fn patch_image_prompt(
|
||||
SystemSettings::update(&state.db, new_settings.clone()).await?;
|
||||
|
||||
Ok(TemplateResponse::new_partial(
|
||||
"admin/base.html",
|
||||
"admin/sections/overview.html",
|
||||
"system_prompt_section",
|
||||
SystemPromptSectionData {
|
||||
settings: new_settings,
|
||||
|
||||
@@ -190,7 +190,7 @@ pub async fn delete_text_content(
|
||||
TextContent::has_other_with_file(&file_info.id, &text_content.id, &state.db).await?;
|
||||
|
||||
if !file_in_use {
|
||||
FileInfo::delete_by_id(&file_info.id, &state.db, &state.config).await?;
|
||||
FileInfo::delete_by_id_with_storage(&file_info.id, &state.db, &state.storage).await?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ use crate::{
|
||||
utils::text_content_preview::truncate_text_contents,
|
||||
AuthSessionType,
|
||||
};
|
||||
use common::storage::store;
|
||||
use common::storage::types::user::DashboardStats;
|
||||
use common::{
|
||||
error::AppError,
|
||||
@@ -86,7 +85,7 @@ pub async fn delete_text_content(
|
||||
TextContent::has_other_with_file(&file_info.id, &text_content.id, &state.db).await?;
|
||||
|
||||
if !file_in_use {
|
||||
FileInfo::delete_by_id(&file_info.id, &state.db, &state.config).await?;
|
||||
FileInfo::delete_by_id_with_storage(&file_info.id, &state.db, &state.storage).await?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -278,7 +277,7 @@ pub async fn serve_file(
|
||||
return Ok(TemplateResponse::unauthorized().into_response());
|
||||
}
|
||||
|
||||
let stream = match store::get_stream_at(&file_info.path, &state.config).await {
|
||||
let stream = match state.storage.get_stream(&file_info.path).await {
|
||||
Ok(s) => s,
|
||||
Err(_) => return Ok(TemplateResponse::server_error().into_response()),
|
||||
};
|
||||
|
||||
@@ -98,7 +98,8 @@ pub async fn process_ingress_form(
|
||||
info!("{:?}", input);
|
||||
|
||||
let file_infos = try_join_all(input.files.into_iter().map(|file| {
|
||||
FileInfo::new(file, &state.db, &user.id, &state.config).map_err(AppError::from)
|
||||
FileInfo::new_with_storage(file, &state.db, &user.id, &state.storage)
|
||||
.map_err(AppError::from)
|
||||
}))
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -4,10 +4,11 @@ use std::fmt;
|
||||
|
||||
use axum::{
|
||||
extract::{Path, Query, State},
|
||||
response::IntoResponse,
|
||||
http::HeaderValue,
|
||||
response::{IntoResponse, Response},
|
||||
Form, Json,
|
||||
};
|
||||
use axum_htmx::{HxBoosted, HxRequest};
|
||||
use axum_htmx::{HxBoosted, HxRequest, HX_TRIGGER};
|
||||
use serde::{
|
||||
de::{self, Deserializer, MapAccess, Visitor},
|
||||
Deserialize, Serialize,
|
||||
@@ -38,10 +39,96 @@ use crate::{
|
||||
use url::form_urlencoded;
|
||||
|
||||
const KNOWLEDGE_ENTITIES_PER_PAGE: usize = 12;
|
||||
const DEFAULT_RELATIONSHIP_TYPE: &str = "relates_to";
|
||||
const RELATIONSHIP_TYPE_OPTIONS: &[&str] = &["RelatedTo", "RelevantTo", "SimilarTo", "References"];
|
||||
const DEFAULT_RELATIONSHIP_TYPE: &str = RELATIONSHIP_TYPE_OPTIONS[0];
|
||||
const MAX_RELATIONSHIP_SUGGESTIONS: usize = 10;
|
||||
const SUGGESTION_MIN_SCORE: f32 = 0.5;
|
||||
|
||||
const GRAPH_REFRESH_TRIGGER: &str = r#"{"knowledge-graph-refresh":true}"#;
|
||||
const RELATIONSHIP_TYPE_ALIASES: &[(&str, &str)] = &[("relatesto", "RelatedTo")];
|
||||
|
||||
fn relationship_type_or_default(value: Option<&str>) -> String {
|
||||
match value {
|
||||
Some(raw) => canonicalize_relationship_type(raw),
|
||||
None => DEFAULT_RELATIONSHIP_TYPE.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn canonicalize_relationship_type(value: &str) -> String {
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() {
|
||||
return DEFAULT_RELATIONSHIP_TYPE.to_string();
|
||||
}
|
||||
|
||||
let key: String = trimmed
|
||||
.chars()
|
||||
.filter(|c| c.is_ascii_alphanumeric())
|
||||
.flat_map(|c| c.to_lowercase())
|
||||
.collect();
|
||||
|
||||
for option in RELATIONSHIP_TYPE_OPTIONS {
|
||||
let option_key: String = option
|
||||
.chars()
|
||||
.filter(|c| c.is_ascii_alphanumeric())
|
||||
.flat_map(|c| c.to_lowercase())
|
||||
.collect();
|
||||
if option_key == key {
|
||||
return (*option).to_string();
|
||||
}
|
||||
}
|
||||
|
||||
for (alias, target) in RELATIONSHIP_TYPE_ALIASES {
|
||||
if *alias == key {
|
||||
return (*target).to_string();
|
||||
}
|
||||
}
|
||||
|
||||
let mut result = String::new();
|
||||
for segment in trimmed
|
||||
.split(|c: char| !c.is_ascii_alphanumeric())
|
||||
.filter(|segment| !segment.is_empty())
|
||||
{
|
||||
let mut chars = segment.chars();
|
||||
if let Some(first) = chars.next() {
|
||||
result.extend(first.to_uppercase());
|
||||
for ch in chars {
|
||||
result.extend(ch.to_lowercase());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if result.is_empty() {
|
||||
trimmed.to_string()
|
||||
} else {
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_relationship_type_options(relationships: &[KnowledgeRelationship]) -> Vec<String> {
|
||||
let mut options: HashSet<String> = RELATIONSHIP_TYPE_OPTIONS
|
||||
.iter()
|
||||
.map(|value| (*value).to_string())
|
||||
.collect();
|
||||
|
||||
for relationship in relationships {
|
||||
options.insert(canonicalize_relationship_type(
|
||||
&relationship.metadata.relationship_type,
|
||||
));
|
||||
}
|
||||
|
||||
let mut options: Vec<String> = options.into_iter().collect();
|
||||
options.sort();
|
||||
options
|
||||
}
|
||||
|
||||
fn respond_with_graph_refresh(response: TemplateResponse) -> Response {
|
||||
let mut response = response.into_response();
|
||||
if let Ok(value) = HeaderValue::from_str(GRAPH_REFRESH_TRIGGER) {
|
||||
response.headers_mut().insert(HX_TRIGGER, value);
|
||||
}
|
||||
response
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
pub struct FilterParams {
|
||||
entity_type: Option<String>,
|
||||
@@ -59,6 +146,8 @@ pub async fn show_new_knowledge_entity_form(
|
||||
.collect();
|
||||
|
||||
let existing_entities = User::get_knowledge_entities(&user.id, &state.db).await?;
|
||||
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
|
||||
let relationship_type_options = collect_relationship_type_options(&relationships);
|
||||
let empty_selected: HashSet<String> = HashSet::new();
|
||||
let empty_scores: HashMap<String, f32> = HashMap::new();
|
||||
let relationship_options =
|
||||
@@ -70,9 +159,10 @@ pub async fn show_new_knowledge_entity_form(
|
||||
entity_types,
|
||||
relationship_list: RelationshipListData {
|
||||
relationship_options,
|
||||
relationship_type: DEFAULT_RELATIONSHIP_TYPE.to_string(),
|
||||
relationship_type: relationship_type_or_default(None),
|
||||
suggestion_count: 0,
|
||||
},
|
||||
relationship_type_options,
|
||||
},
|
||||
))
|
||||
}
|
||||
@@ -107,13 +197,7 @@ pub async fn create_knowledge_entity(
|
||||
|
||||
state.db.store_item(new_entity.clone()).await?;
|
||||
|
||||
let relationship_type = form
|
||||
.relationship_type
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or(DEFAULT_RELATIONSHIP_TYPE)
|
||||
.to_string();
|
||||
let relationship_type = relationship_type_or_default(form.relationship_type.as_deref());
|
||||
|
||||
debug!("form: {:?}", form);
|
||||
if !form.relationship_ids.is_empty() {
|
||||
@@ -148,11 +232,11 @@ pub async fn create_knowledge_entity(
|
||||
|
||||
let default_params = FilterParams::default();
|
||||
let kb_data = build_knowledge_base_data(&state, &user, &default_params).await?;
|
||||
Ok(TemplateResponse::new_partial(
|
||||
Ok(respond_with_graph_refresh(TemplateResponse::new_partial(
|
||||
"knowledge/base.html",
|
||||
"main",
|
||||
kb_data,
|
||||
))
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn suggest_knowledge_relationships(
|
||||
@@ -225,13 +309,7 @@ pub async fn suggest_knowledge_relationships(
|
||||
}
|
||||
}
|
||||
|
||||
let relationship_type = form
|
||||
.relationship_type
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or(DEFAULT_RELATIONSHIP_TYPE)
|
||||
.to_string();
|
||||
let relationship_type = relationship_type_or_default(form.relationship_type.as_deref());
|
||||
|
||||
let entities: Vec<KnowledgeEntity> = entity_lookup.into_values().collect();
|
||||
let relationship_options =
|
||||
@@ -251,7 +329,7 @@ pub async fn suggest_knowledge_relationships(
|
||||
pub struct KnowledgeBaseData {
|
||||
entities: Vec<KnowledgeEntity>,
|
||||
visible_entities: Vec<KnowledgeEntity>,
|
||||
relationships: Vec<KnowledgeRelationship>,
|
||||
relationships: Vec<RelationshipTableRow>,
|
||||
user: User,
|
||||
entity_types: Vec<String>,
|
||||
content_categories: Vec<String>,
|
||||
@@ -260,6 +338,8 @@ pub struct KnowledgeBaseData {
|
||||
conversation_archive: Vec<Conversation>,
|
||||
pagination: Pagination,
|
||||
page_query: String,
|
||||
relationship_type_options: Vec<String>,
|
||||
default_relationship_type: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -270,6 +350,12 @@ pub struct RelationshipOption {
|
||||
score: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct RelationshipTableRow {
|
||||
relationship: KnowledgeRelationship,
|
||||
relationship_type_label: String,
|
||||
}
|
||||
|
||||
fn build_relationship_options(
|
||||
entities: Vec<KnowledgeEntity>,
|
||||
selected_ids: &HashSet<String>,
|
||||
@@ -309,6 +395,40 @@ fn build_relationship_options(
|
||||
options
|
||||
}
|
||||
|
||||
fn build_relationship_table_data(
|
||||
entities: Vec<KnowledgeEntity>,
|
||||
relationships: Vec<KnowledgeRelationship>,
|
||||
) -> RelationshipTableData {
|
||||
let relationship_type_options = collect_relationship_type_options(&relationships);
|
||||
let mut frequency: HashMap<String, usize> = HashMap::new();
|
||||
let relationships = relationships
|
||||
.into_iter()
|
||||
.map(|relationship| {
|
||||
let relationship_type_label =
|
||||
canonicalize_relationship_type(&relationship.metadata.relationship_type);
|
||||
*frequency
|
||||
.entry(relationship_type_label.clone())
|
||||
.or_insert(0) += 1;
|
||||
RelationshipTableRow {
|
||||
relationship,
|
||||
relationship_type_label,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let default_relationship_type = frequency
|
||||
.into_iter()
|
||||
.max_by_key(|(_, count)| *count)
|
||||
.map(|(label, _)| label)
|
||||
.unwrap_or_else(|| DEFAULT_RELATIONSHIP_TYPE.to_string());
|
||||
|
||||
RelationshipTableData {
|
||||
entities,
|
||||
relationships,
|
||||
relationship_type_options,
|
||||
default_relationship_type,
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_knowledge_base_data(
|
||||
state: &HtmlState,
|
||||
user: &User,
|
||||
@@ -348,10 +468,16 @@ async fn build_knowledge_base_data(
|
||||
|
||||
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
|
||||
let entity_id_set: HashSet<String> = entities.iter().map(|e| e.id.clone()).collect();
|
||||
let relationships: Vec<KnowledgeRelationship> = relationships
|
||||
let filtered_relationships: Vec<KnowledgeRelationship> = relationships
|
||||
.into_iter()
|
||||
.filter(|rel| entity_id_set.contains(&rel.in_) && entity_id_set.contains(&rel.out))
|
||||
.collect();
|
||||
let RelationshipTableData {
|
||||
entities: _,
|
||||
relationships,
|
||||
relationship_type_options,
|
||||
default_relationship_type,
|
||||
} = build_relationship_table_data(entities.clone(), filtered_relationships);
|
||||
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
|
||||
|
||||
Ok(KnowledgeBaseData {
|
||||
@@ -366,6 +492,8 @@ async fn build_knowledge_base_data(
|
||||
conversation_archive,
|
||||
pagination,
|
||||
page_query,
|
||||
relationship_type_options,
|
||||
default_relationship_type,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -380,6 +508,7 @@ pub struct RelationshipListData {
|
||||
pub struct NewEntityModalData {
|
||||
entity_types: Vec<String>,
|
||||
relationship_list: RelationshipListData,
|
||||
relationship_type_options: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -679,7 +808,7 @@ pub async fn get_knowledge_graph_json(
|
||||
links.push(GraphLink {
|
||||
source: rel.out.clone(),
|
||||
target: rel.in_.clone(),
|
||||
relationship_type: rel.metadata.relationship_type.clone(),
|
||||
relationship_type: canonicalize_relationship_type(&rel.metadata.relationship_type),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -806,7 +935,7 @@ pub async fn patch_knowledge_entity(
|
||||
let content_categories = User::get_user_categories(&user.id, &state.db).await?;
|
||||
|
||||
// Render updated list
|
||||
Ok(TemplateResponse::new_template(
|
||||
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
|
||||
"knowledge/entity_list.html",
|
||||
EntityListData {
|
||||
visible_entities,
|
||||
@@ -818,7 +947,7 @@ pub async fn patch_knowledge_entity(
|
||||
selected_content_category: None,
|
||||
page_query: String::new(),
|
||||
},
|
||||
))
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn delete_knowledge_entity(
|
||||
@@ -845,7 +974,7 @@ pub async fn delete_knowledge_entity(
|
||||
// Get content categories
|
||||
let content_categories = User::get_user_categories(&user.id, &state.db).await?;
|
||||
|
||||
Ok(TemplateResponse::new_template(
|
||||
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
|
||||
"knowledge/entity_list.html",
|
||||
EntityListData {
|
||||
visible_entities,
|
||||
@@ -857,13 +986,15 @@ pub async fn delete_knowledge_entity(
|
||||
selected_content_category: None,
|
||||
page_query: String::new(),
|
||||
},
|
||||
))
|
||||
)))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct RelationshipTableData {
|
||||
entities: Vec<KnowledgeEntity>,
|
||||
relationships: Vec<KnowledgeRelationship>,
|
||||
relationships: Vec<RelationshipTableRow>,
|
||||
relationship_type_options: Vec<String>,
|
||||
default_relationship_type: String,
|
||||
}
|
||||
|
||||
pub async fn delete_knowledge_relationship(
|
||||
@@ -876,15 +1007,13 @@ pub async fn delete_knowledge_relationship(
|
||||
let entities = User::get_knowledge_entities(&user.id, &state.db).await?;
|
||||
|
||||
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
|
||||
let table_data = build_relationship_table_data(entities, relationships);
|
||||
|
||||
// Render updated list
|
||||
Ok(TemplateResponse::new_template(
|
||||
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
|
||||
"knowledge/relationship_table.html",
|
||||
RelationshipTableData {
|
||||
entities,
|
||||
relationships,
|
||||
},
|
||||
))
|
||||
table_data,
|
||||
)))
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@@ -900,12 +1029,13 @@ pub async fn save_knowledge_relationship(
|
||||
Form(form): Form<SaveKnowledgeRelationshipInput>,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
// Construct relationship
|
||||
let relationship_type = canonicalize_relationship_type(&form.relationship_type);
|
||||
let relationship = KnowledgeRelationship::new(
|
||||
form.in_,
|
||||
form.out,
|
||||
user.id.clone(),
|
||||
"manual".into(),
|
||||
form.relationship_type,
|
||||
relationship_type,
|
||||
);
|
||||
|
||||
relationship.store_relationship(&state.db).await?;
|
||||
@@ -913,13 +1043,11 @@ pub async fn save_knowledge_relationship(
|
||||
let entities = User::get_knowledge_entities(&user.id, &state.db).await?;
|
||||
|
||||
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
|
||||
let table_data = build_relationship_table_data(entities, relationships);
|
||||
|
||||
// Render updated list
|
||||
Ok(TemplateResponse::new_template(
|
||||
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
|
||||
"knowledge/relationship_table.html",
|
||||
RelationshipTableData {
|
||||
entities,
|
||||
relationships,
|
||||
},
|
||||
))
|
||||
table_data,
|
||||
)))
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use axum::{
|
||||
Form,
|
||||
};
|
||||
use axum_htmx::{HxBoosted, HxRequest, HX_TRIGGER};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::html_state::HtmlState;
|
||||
@@ -32,7 +33,7 @@ pub struct ScratchpadListItem {
|
||||
id: String,
|
||||
title: String,
|
||||
content: String,
|
||||
last_saved_at: String,
|
||||
last_saved_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -46,9 +47,9 @@ pub struct ScratchpadDetailData {
|
||||
pub struct ScratchpadArchiveItem {
|
||||
id: String,
|
||||
title: String,
|
||||
archived_at: String,
|
||||
archived_at: Option<DateTime<Utc>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
ingested_at: Option<String>,
|
||||
ingested_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -56,9 +57,9 @@ pub struct ScratchpadDetail {
|
||||
id: String,
|
||||
title: String,
|
||||
content: String,
|
||||
created_at: String,
|
||||
updated_at: String,
|
||||
last_saved_at: String,
|
||||
created_at: DateTime<Utc>,
|
||||
updated_at: DateTime<Utc>,
|
||||
last_saved_at: DateTime<Utc>,
|
||||
is_dirty: bool,
|
||||
}
|
||||
|
||||
@@ -75,7 +76,7 @@ impl From<&Scratchpad> for ScratchpadListItem {
|
||||
id: value.id.clone(),
|
||||
title: value.title.clone(),
|
||||
content: value.content.clone(),
|
||||
last_saved_at: value.last_saved_at.format("%Y-%m-%d %H:%M").to_string(),
|
||||
last_saved_at: value.last_saved_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -85,13 +86,8 @@ impl From<&Scratchpad> for ScratchpadArchiveItem {
|
||||
Self {
|
||||
id: value.id.clone(),
|
||||
title: value.title.clone(),
|
||||
archived_at: value
|
||||
.archived_at
|
||||
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
|
||||
.unwrap_or_else(|| "Unknown".to_string()),
|
||||
ingested_at: value
|
||||
.ingested_at
|
||||
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()),
|
||||
archived_at: value.archived_at,
|
||||
ingested_at: value.ingested_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -102,9 +98,9 @@ impl From<&Scratchpad> for ScratchpadDetail {
|
||||
id: value.id.clone(),
|
||||
title: value.title.clone(),
|
||||
content: value.content.clone(),
|
||||
created_at: value.created_at.format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
updated_at: value.updated_at.format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
last_saved_at: value.last_saved_at.format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
created_at: value.created_at,
|
||||
updated_at: value.updated_at,
|
||||
last_saved_at: value.last_saved_at,
|
||||
is_dirty: value.is_dirty,
|
||||
}
|
||||
}
|
||||
@@ -391,6 +387,126 @@ pub async fn ingest_scratchpad(
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn archive_scratchpad(
|
||||
RequireUser(user): RequireUser,
|
||||
State(state): State<HtmlState>,
|
||||
Path(scratchpad_id): Path<String>,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
Scratchpad::archive(&scratchpad_id, &user.id, &state.db, false).await?;
|
||||
|
||||
let scratchpads = Scratchpad::get_by_user(&user.id, &state.db).await?;
|
||||
let archived_scratchpads = Scratchpad::get_archived_by_user(&user.id, &state.db).await?;
|
||||
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
|
||||
|
||||
let scratchpad_list: Vec<ScratchpadListItem> =
|
||||
scratchpads.iter().map(ScratchpadListItem::from).collect();
|
||||
let archived_list: Vec<ScratchpadArchiveItem> = archived_scratchpads
|
||||
.iter()
|
||||
.map(ScratchpadArchiveItem::from)
|
||||
.collect();
|
||||
|
||||
Ok(TemplateResponse::new_template(
|
||||
"scratchpad/base.html",
|
||||
ScratchpadPageData {
|
||||
user,
|
||||
scratchpads: scratchpad_list,
|
||||
archived_scratchpads: archived_list,
|
||||
conversation_archive,
|
||||
new_scratchpad: None,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::Utc;
|
||||
|
||||
#[test]
|
||||
fn test_scratchpad_list_item_conversion() {
|
||||
// Create a test scratchpad with datetime values
|
||||
let now = Utc::now();
|
||||
let mut scratchpad = common::storage::types::scratchpad::Scratchpad::new(
|
||||
"test_user".to_string(),
|
||||
"Test Scratchpad".to_string(),
|
||||
);
|
||||
|
||||
// Override the timestamps with known values for testing
|
||||
scratchpad.last_saved_at = now;
|
||||
|
||||
// Test conversion to ScratchpadListItem
|
||||
let list_item = ScratchpadListItem::from(&scratchpad);
|
||||
|
||||
assert_eq!(list_item.id, scratchpad.id);
|
||||
assert_eq!(list_item.title, scratchpad.title);
|
||||
assert_eq!(list_item.content, scratchpad.content);
|
||||
assert_eq!(list_item.last_saved_at, scratchpad.last_saved_at);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scratchpad_detail_conversion() {
|
||||
// Create a test scratchpad with datetime values
|
||||
let now = Utc::now();
|
||||
let mut scratchpad = common::storage::types::scratchpad::Scratchpad::new(
|
||||
"test_user".to_string(),
|
||||
"Test Scratchpad".to_string(),
|
||||
);
|
||||
|
||||
// Override the timestamps with known values for testing
|
||||
scratchpad.last_saved_at = now;
|
||||
|
||||
// Test conversion to ScratchpadDetail
|
||||
let detail = ScratchpadDetail::from(&scratchpad);
|
||||
|
||||
assert_eq!(detail.id, scratchpad.id);
|
||||
assert_eq!(detail.title, scratchpad.title);
|
||||
assert_eq!(detail.content, scratchpad.content);
|
||||
assert_eq!(detail.created_at, scratchpad.created_at);
|
||||
assert_eq!(detail.updated_at, scratchpad.updated_at);
|
||||
assert_eq!(detail.last_saved_at, scratchpad.last_saved_at);
|
||||
assert_eq!(detail.is_dirty, scratchpad.is_dirty);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scratchpad_archive_item_conversion() {
|
||||
// Create a test scratchpad with optional datetime values
|
||||
let now = Utc::now();
|
||||
let mut scratchpad = common::storage::types::scratchpad::Scratchpad::new(
|
||||
"test_user".to_string(),
|
||||
"Test Scratchpad".to_string(),
|
||||
);
|
||||
|
||||
// Set optional datetime fields
|
||||
scratchpad.archived_at = Some(now);
|
||||
scratchpad.ingested_at = Some(now);
|
||||
|
||||
// Test conversion to ScratchpadArchiveItem
|
||||
let archive_item = ScratchpadArchiveItem::from(&scratchpad);
|
||||
|
||||
assert_eq!(archive_item.id, scratchpad.id);
|
||||
assert_eq!(archive_item.title, scratchpad.title);
|
||||
assert_eq!(archive_item.archived_at, scratchpad.archived_at);
|
||||
assert_eq!(archive_item.ingested_at, scratchpad.ingested_at);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scratchpad_archive_item_conversion_with_none_values() {
|
||||
// Create a test scratchpad without optional datetime values
|
||||
let scratchpad = common::storage::types::scratchpad::Scratchpad::new(
|
||||
"test_user".to_string(),
|
||||
"Test Scratchpad".to_string(),
|
||||
);
|
||||
|
||||
// Test conversion to ScratchpadArchiveItem
|
||||
let archive_item = ScratchpadArchiveItem::from(&scratchpad);
|
||||
|
||||
assert_eq!(archive_item.id, scratchpad.id);
|
||||
assert_eq!(archive_item.title, scratchpad.title);
|
||||
assert_eq!(archive_item.archived_at, None);
|
||||
assert_eq!(archive_item.ingested_at, None);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn restore_scratchpad(
|
||||
RequireUser(user): RequireUser,
|
||||
State(state): State<HtmlState>,
|
||||
@@ -439,52 +555,3 @@ pub async fn restore_scratchpad(
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn archive_scratchpad(
|
||||
RequireUser(user): RequireUser,
|
||||
State(state): State<HtmlState>,
|
||||
Path(scratchpad_id): Path<String>,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
Scratchpad::archive(&scratchpad_id, &user.id, &state.db, false).await?;
|
||||
|
||||
let scratchpads = Scratchpad::get_by_user(&user.id, &state.db).await?;
|
||||
let archived_scratchpads = Scratchpad::get_archived_by_user(&user.id, &state.db).await?;
|
||||
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
|
||||
|
||||
let scratchpad_list: Vec<ScratchpadListItem> =
|
||||
scratchpads.iter().map(ScratchpadListItem::from).collect();
|
||||
let archived_list: Vec<ScratchpadArchiveItem> = archived_scratchpads
|
||||
.iter()
|
||||
.map(ScratchpadArchiveItem::from)
|
||||
.collect();
|
||||
|
||||
let trigger_payload = serde_json::json!({
|
||||
"toast": {
|
||||
"title": "Scratchpad archived",
|
||||
"description": "You can find it in the archive drawer below.",
|
||||
"type": "warning"
|
||||
}
|
||||
});
|
||||
let trigger_value = serde_json::to_string(&trigger_payload).unwrap_or_else(|_| {
|
||||
r#"{"toast":{"title":"Scratchpad archived","description":"You can find it in the archive drawer below.","type":"warning"}}"#.to_string()
|
||||
});
|
||||
|
||||
let template_response = TemplateResponse::new_partial(
|
||||
"scratchpad/base.html",
|
||||
"main",
|
||||
ScratchpadPageData {
|
||||
user,
|
||||
scratchpads: scratchpad_list,
|
||||
archived_scratchpads: archived_list,
|
||||
conversation_archive,
|
||||
new_scratchpad: None,
|
||||
},
|
||||
);
|
||||
|
||||
let mut response = template_response.into_response();
|
||||
if let Ok(header_value) = HeaderValue::from_str(&trigger_value) {
|
||||
response.headers_mut().insert(HX_TRIGGER, header_value);
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
@@ -3,154 +3,49 @@
|
||||
{% block title %}Minne - Admin{% endblock %}
|
||||
|
||||
{% block main %}
|
||||
<div class="flex justify-center grow mt-2 sm:mt-4 pb-4">
|
||||
<div class="container">
|
||||
<section class="mb-4">
|
||||
<div class="nb-panel p-3 flex items-center justify-between">
|
||||
<h1 class="text-xl font-extrabold tracking-tight">Admin Dashboard</h1>
|
||||
<div id="admin-shell" class="flex justify-center grow mt-2 sm:mt-4 pb-4">
|
||||
<div class="container flex flex-col gap-4">
|
||||
<section class="nb-panel p-4 sm:p-5 flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
|
||||
<div>
|
||||
<h1 class="text-xl font-extrabold tracking-tight">Admin Controls</h1>
|
||||
<p class="text-sm opacity-70 max-w-2xl">
|
||||
Stay on top of analytics and manage AI integrations without waiting on long-running model calls.
|
||||
</p>
|
||||
</div>
|
||||
<div class="text-xs opacity-60 sm:text-right">
|
||||
Signed in as <span class="font-medium">{{ user.email }}</span>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="mb-4">
|
||||
<div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
|
||||
<div class="nb-stat">
|
||||
<div class="text-xs opacity-70">Page Loads</div>
|
||||
<div class="text-3xl font-extrabold">{{analytics.page_loads}}</div>
|
||||
<div class="text-xs opacity-60">Total page load events</div>
|
||||
</div>
|
||||
<div class="nb-stat">
|
||||
<div class="text-xs opacity-70">Unique Visitors</div>
|
||||
<div class="text-3xl font-extrabold">{{analytics.visitors}}</div>
|
||||
<div class="text-xs opacity-60">Distinct users by fingerprint</div>
|
||||
</div>
|
||||
<div class="nb-stat">
|
||||
<div class="text-xs opacity-70">Users</div>
|
||||
<div class="text-3xl font-extrabold">{{users}}</div>
|
||||
<div class="text-xs opacity-60">Registered accounts</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<nav
|
||||
class="nb-panel p-2 flex flex-wrap gap-2 text-sm"
|
||||
hx-boost="true"
|
||||
hx-target="#admin-shell"
|
||||
hx-select="#admin-shell"
|
||||
hx-swap="outerHTML"
|
||||
hx-push-url="true"
|
||||
>
|
||||
<a
|
||||
href="/admin?section=overview"
|
||||
class="nb-btn btn-sm px-4 {% if current_section == 'overview' %}nb-cta{% else %}btn-ghost{% endif %}"
|
||||
>
|
||||
Overview
|
||||
</a>
|
||||
<a
|
||||
href="/admin?section=models"
|
||||
class="nb-btn btn-sm px-4 {% if current_section == 'models' %}nb-cta{% else %}btn-ghost{% endif %}"
|
||||
>
|
||||
Models
|
||||
</a>
|
||||
</nav>
|
||||
|
||||
<section class="grid grid-cols-1 xl:grid-cols-2 gap-4">
|
||||
{% block system_prompt_section %}
|
||||
<div id="system_prompt_section" class="nb-panel p-4">
|
||||
<div class="text-sm font-semibold mb-3">System Prompts</div>
|
||||
<div class="flex gap-2 flex-col sm:flex-row">
|
||||
<button type="button" class="nb-btn btn-sm" hx-get="/edit-query-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Query Prompt</button>
|
||||
<button type="button" class="nb-btn btn-sm" hx-get="/edit-ingestion-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Ingestion Prompt</button>
|
||||
<button type="button" class="nb-btn btn-sm" hx-get="/edit-image-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Image Prompt</button>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
<div class="nb-panel p-4">
|
||||
<div class="text-sm font-semibold mb-3">AI Models</div>
|
||||
{% block model_settings_form %}
|
||||
<form hx-patch="/update-model-settings" hx-swap="outerHTML" class="grid grid-cols-1 gap-4">
|
||||
<!-- Query Model -->
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Query Model</div>
|
||||
<select name="query_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{model.id}}" {% if settings.query_model==model.id %} selected {% endif %}>{{model.id}}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.query_model}}</span></p>
|
||||
</div>
|
||||
|
||||
<!-- Processing Model -->
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Processing Model</div>
|
||||
<select name="processing_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{model.id}}" {% if settings.processing_model==model.id %} selected {% endif %}>{{model.id}}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.processing_model}}</span></p>
|
||||
</div>
|
||||
|
||||
<!-- Image Processing Model -->
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Image Processing Model</div>
|
||||
<select name="image_processing_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{model.id}}" {% if settings.image_processing_model==model.id %} selected {% endif %}>{{model.id}}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.image_processing_model}}</span></p>
|
||||
</div>
|
||||
|
||||
<!-- Voice Processing Model -->
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Voice Processing Model</div>
|
||||
<select name="voice_processing_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{model.id}}" {% if settings.voice_processing_model==model.id %} selected {% endif %}>{{model.id}}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.voice_processing_model}}</span></p>
|
||||
</div>
|
||||
|
||||
<!-- Embedding Model -->
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Embedding Model</div>
|
||||
<select name="embedding_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{model.id}}" {% if settings.embedding_model==model.id %} selected {% endif %}>{{model.id}}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.embedding_model}} ({{settings.embedding_dimensions}} dims)</span></p>
|
||||
</div>
|
||||
|
||||
<!-- Embedding Dimensions -->
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1" for="embedding_dimensions">Embedding Dimensions</div>
|
||||
<input type="number" id="embedding_dimensions" name="embedding_dimensions" class="nb-input w-full" value="{{ settings.embedding_dimensions }}" required />
|
||||
</div>
|
||||
|
||||
<!-- Alert -->
|
||||
<div id="embedding-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
|
||||
<div class="text-sm"><strong>Warning:</strong> Changing dimensions will require re-creating all embeddings. Look up your model's required dimensions or use a model that allows specifying them.</div>
|
||||
</div>
|
||||
|
||||
<div class="flex justify-end">
|
||||
<button type="submit" class="nb-btn nb-cta btn-sm">Save Model Settings</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<script>
|
||||
// Rebind after HTMX swaps
|
||||
(() => {
|
||||
const dimensionInput = document.getElementById('embedding_dimensions');
|
||||
const alertElement = document.getElementById('embedding-change-alert');
|
||||
const initialDimensions = '{{ settings.embedding_dimensions }}';
|
||||
if (dimensionInput && alertElement) {
|
||||
dimensionInput.addEventListener('input', (event) => {
|
||||
if (String(event.target.value) !== String(initialDimensions)) {
|
||||
alertElement.classList.remove('hidden');
|
||||
} else {
|
||||
alertElement.classList.add('hidden');
|
||||
}
|
||||
});
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
{% endblock %}
|
||||
</div>
|
||||
|
||||
<div class="nb-panel p-4">
|
||||
<div class="text-sm font-semibold mb-3">Registration</div>
|
||||
<label class="flex items-center gap-3">
|
||||
{% block registration_status_input %}
|
||||
<form hx-patch="/toggle-registrations" hx-swap="outerHTML" hx-trigger="change">
|
||||
<input name="registration_open" type="checkbox" class="nb-checkbox" {% if settings.registrations_enabled %}checked{% endif %} />
|
||||
</form>
|
||||
{% endblock %}
|
||||
<span class="text-sm">Enable Registrations</span>
|
||||
</label>
|
||||
<div id="registration-status" class="text-xs opacity-70 mt-2"></div>
|
||||
</div>
|
||||
</section>
|
||||
<div id="admin-content" class="flex flex-col gap-4">
|
||||
{% if current_section == 'models' %}
|
||||
{% include 'admin/sections/models.html' %}
|
||||
{% else %}
|
||||
{% include 'admin/sections/overview.html' %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
130
html-router/templates/admin/sections/models.html
Normal file
130
html-router/templates/admin/sections/models.html
Normal file
@@ -0,0 +1,130 @@
|
||||
<section class="nb-panel p-4 sm:p-5 flex flex-col gap-4">
|
||||
<div class="flex items-start justify-between flex-col sm:flex-row gap-3">
|
||||
<div>
|
||||
<div class="text-sm uppercase tracking-wide opacity-60 mb-1">AI Models</div>
|
||||
<h2 class="text-lg font-semibold">Model configuration</h2>
|
||||
<p class="text-xs opacity-70 max-w-2xl">
|
||||
Choose which models power conversational search, ingestion analysis, and embeddings. Adjusting embeddings may trigger a full reprocess.
|
||||
</p>
|
||||
</div>
|
||||
<a
|
||||
href="/admin?section=overview"
|
||||
class="nb-btn btn-sm btn-ghost"
|
||||
hx-boost="true"
|
||||
hx-target="#admin-shell"
|
||||
hx-select="#admin-shell"
|
||||
hx-swap="outerHTML"
|
||||
hx-push-url="true"
|
||||
>
|
||||
← Back to Admin
|
||||
</a>
|
||||
</div>
|
||||
|
||||
{% if available_models %}
|
||||
{% block model_settings_form %}
|
||||
<form hx-patch="/update-model-settings" hx-swap="outerHTML" class="grid grid-cols-1 gap-4">
|
||||
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Query Model</div>
|
||||
<select name="query_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{ model.id }}" {% if settings.query_model == model.id %}selected{% endif %}>{{ model.id }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.query_model }}</span></p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Processing Model</div>
|
||||
<select name="processing_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{ model.id }}" {% if settings.processing_model == model.id %}selected{% endif %}>{{ model.id }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.processing_model }}</span></p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Image Processing Model</div>
|
||||
<select name="image_processing_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{ model.id }}" {% if settings.image_processing_model == model.id %}selected{% endif %}>{{ model.id }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.image_processing_model }}</span></p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Voice Processing Model</div>
|
||||
<select name="voice_processing_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{ model.id }}" {% if settings.voice_processing_model == model.id %}selected{% endif %}>{{ model.id }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.voice_processing_model }}</span></p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1">Embedding Model</div>
|
||||
<select name="embedding_model" class="nb-select w-full">
|
||||
{% for model in available_models.data %}
|
||||
<option value="{{ model.id }}" {% if settings.embedding_model == model.id %}selected{% endif %}>{{ model.id }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.embedding_model }}</span></p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="text-sm opacity-80 mb-1" for="embedding_dimensions">Embedding Dimensions</div>
|
||||
<input
|
||||
type="number"
|
||||
id="embedding_dimensions"
|
||||
name="embedding_dimensions"
|
||||
class="nb-input w-full"
|
||||
value="{{ settings.embedding_dimensions }}"
|
||||
required
|
||||
min="1"
|
||||
/>
|
||||
<p class="text-xs opacity-70 mt-1">Changing dimensions will trigger a background re-embedding.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="embedding-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
|
||||
<div class="text-sm">
|
||||
<strong>Warning:</strong> Changing dimensions recreates embeddings for text chunks and knowledge entities. Confirm the target model requires the new value.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex justify-end gap-2">
|
||||
<button type="submit" class="nb-btn nb-cta btn-sm">Save Model Settings</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<script>
|
||||
(() => {
|
||||
const dimensionInput = document.getElementById('embedding_dimensions');
|
||||
const alertElement = document.getElementById('embedding-change-alert');
|
||||
const initialDimensions = '{{ settings.embedding_dimensions }}';
|
||||
if (dimensionInput && alertElement) {
|
||||
dimensionInput.addEventListener('input', (event) => {
|
||||
if (String(event.target.value) !== String(initialDimensions)) {
|
||||
alertElement.classList.remove('hidden');
|
||||
} else {
|
||||
alertElement.classList.add('hidden');
|
||||
}
|
||||
});
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
{% endblock %}
|
||||
{% else %}
|
||||
<div class="nb-panel p-4 bg-warning/10 border border-warning/40">
|
||||
<div class="text-sm font-semibold mb-1">Unable to load models</div>
|
||||
<p class="text-xs opacity-70">We could not reach the model provider. Check the API key and retry.</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</section>
|
||||
57
html-router/templates/admin/sections/overview.html
Normal file
57
html-router/templates/admin/sections/overview.html
Normal file
@@ -0,0 +1,57 @@
|
||||
{% if analytics %}
|
||||
<section class="grid grid-cols-1 sm:grid-cols-3 gap-4">
|
||||
<div class="nb-stat">
|
||||
<div class="text-xs opacity-70">Page Loads</div>
|
||||
<div class="text-3xl font-extrabold">{{ analytics.page_loads }}</div>
|
||||
<div class="text-xs opacity-60">Total load events seen by Minne</div>
|
||||
</div>
|
||||
<div class="nb-stat">
|
||||
<div class="text-xs opacity-70">Unique Visitors</div>
|
||||
<div class="text-3xl font-extrabold">{{ analytics.visitors }}</div>
|
||||
<div class="text-xs opacity-60">Distinct users by fingerprint</div>
|
||||
</div>
|
||||
<div class="nb-stat">
|
||||
<div class="text-xs opacity-70">Users</div>
|
||||
<div class="text-3xl font-extrabold">{{ users or 0 }}</div>
|
||||
<div class="text-xs opacity-60">Registered accounts</div>
|
||||
</div>
|
||||
</section>
|
||||
{% else %}
|
||||
<section class="nb-panel p-4">
|
||||
<div class="text-sm font-semibold mb-2">Analytics unavailable</div>
|
||||
<p class="text-xs opacity-70">We could not fetch analytics for this view. Reload or check the monitoring pipeline.</p>
|
||||
</section>
|
||||
{% endif %}
|
||||
|
||||
<section class="grid grid-cols-1 xl:grid-cols-2 gap-4">
|
||||
{% block system_prompt_section %}
|
||||
<div id="system_prompt_section" class="nb-panel p-4">
|
||||
<div class="flex items-start justify-between gap-2 mb-3">
|
||||
<div>
|
||||
<div class="text-sm font-semibold">System Prompts</div>
|
||||
<p class="text-xs opacity-70">Adjust the prompts that power retrieval, ingestion analysis, and image processing flows.</p>
|
||||
</div>
|
||||
<span class="text-[10px] uppercase tracking-wide opacity-60">LLM</span>
|
||||
</div>
|
||||
<div class="flex gap-2 flex-col sm:flex-row">
|
||||
<button type="button" class="nb-btn btn-sm" hx-get="/edit-query-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Query Prompt</button>
|
||||
<button type="button" class="nb-btn btn-sm" hx-get="/edit-ingestion-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Ingestion Prompt</button>
|
||||
<button type="button" class="nb-btn btn-sm" hx-get="/edit-image-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Image Prompt</button>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
<div class="nb-panel p-4">
|
||||
<div class="text-sm font-semibold mb-2">Registration</div>
|
||||
<p class="text-xs opacity-60 mb-3">Toggle whether new people can sign up without an invite.</p>
|
||||
<label class="flex items-center gap-3">
|
||||
{% block registration_status_input %}
|
||||
<form hx-patch="/toggle-registrations" hx-swap="outerHTML" hx-trigger="change">
|
||||
<input name="registration_open" type="checkbox" class="nb-checkbox" {% if settings.registrations_enabled %}checked{% endif %} />
|
||||
</form>
|
||||
{% endblock %}
|
||||
<span class="text-sm">Enable Registrations</span>
|
||||
</label>
|
||||
<div id="registration-status" class="text-xs opacity-70 mt-2"></div>
|
||||
</div>
|
||||
</section>
|
||||
@@ -45,8 +45,13 @@ hx-swap="outerHTML"
|
||||
<label class="flex items-center gap-2">
|
||||
<span class="text-xs uppercase tracking-wide opacity-70">Type</span>
|
||||
<input type="text" name="relationship_type" value="{{ relationship_list.relationship_type }}"
|
||||
class="nb-input w-28" placeholder="relates_to">
|
||||
class="nb-input w-32" placeholder="RelatedTo" list="relationship-type-options">
|
||||
</label>
|
||||
<datalist id="relationship-type-options">
|
||||
{% for rel_type in relationship_type_options %}
|
||||
<option value="{{ rel_type }}"></option>
|
||||
{% endfor %}
|
||||
</datalist>
|
||||
<button type="button" class="nb-btn btn-sm nb-cta sm:ml-2" hx-post="/knowledge-entity/suggestions"
|
||||
hx-target="#relationship-list" hx-swap="outerHTML" hx-include="#modal_form">
|
||||
Suggest Relationships
|
||||
@@ -71,4 +76,4 @@ hx-swap="outerHTML"
|
||||
<button type="submit" class="nb-btn nb-cta">
|
||||
Create Entity
|
||||
</button>
|
||||
{% endblock %}
|
||||
{% endblock %}
|
||||
|
||||
@@ -9,7 +9,8 @@
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for relationship in relationships %}
|
||||
{% for row in relationships %}
|
||||
{% set relationship = row.relationship %}
|
||||
<tr>
|
||||
<!-- Origin column -->
|
||||
<td>
|
||||
@@ -30,7 +31,7 @@
|
||||
{{ relationship.out }}
|
||||
{% endfor %}
|
||||
</td>
|
||||
<td class="uppercase tracking-wide text-xs">{{ relationship.metadata.relationship_type }}</td>
|
||||
<td class="uppercase tracking-wide text-xs">{{ row.relationship_type_label }}</td>
|
||||
<td>
|
||||
<button class="nb-btn btn-xs" hx-delete="/knowledge-relationship/{{ relationship.id }}"
|
||||
hx-target="#relationship_table_section" hx-swap="outerHTML">
|
||||
@@ -61,7 +62,7 @@
|
||||
</td>
|
||||
<td>
|
||||
<input id="relationship_type_input" name="relationship_type" type="text" placeholder="RelatedTo"
|
||||
class="nb-input w-full new_relationship_input" />
|
||||
class="nb-input w-full new_relationship_input" value="{{ default_relationship_type }}" />
|
||||
</td>
|
||||
<td>
|
||||
<button id="save_relationship_button" type="button" class="nb-btn btn-sm" hx-post="/knowledge-relationship"
|
||||
@@ -80,4 +81,4 @@
|
||||
document.getElementById('save_relationship_button').click();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</script>
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
{{ scratchpad.content[:100] }}{% if scratchpad.content|length > 100 %}...{% endif %}
|
||||
</div>
|
||||
<div class="text-xs text-base-content/50">
|
||||
Last saved: {{ scratchpad.last_saved_at }}
|
||||
Last saved: {{ scratchpad.last_saved_at | datetimeformat(format="short", tz=user.timezone) }}
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
@@ -76,9 +76,9 @@
|
||||
<div class="flex items-start justify-between gap-3">
|
||||
<div class="flex-1 min-w-0">
|
||||
<h4 class="font-semibold text-base truncate" title="{{ scratchpad.title }}">{{ scratchpad.title }}</h4>
|
||||
<div class="text-xs text-base-content/50">Archived {{ scratchpad.archived_at }}</div>
|
||||
<div class="text-xs text-base-content/50">Archived {{ scratchpad.archived_at | datetimeformat(format="short", tz=user.timezone) }}</div>
|
||||
{% if scratchpad.ingested_at %}
|
||||
<div class="text-xs text-base-content/40">Ingestion started {{ scratchpad.ingested_at }}</div>
|
||||
<div class="text-xs text-base-content/40">Ingestion started {{ scratchpad.ingested_at | datetimeformat(format="short", tz=user.timezone) }}</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="flex items-center gap-2 flex-shrink-0 flex-wrap justify-end">
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
<div class="flex flex-col gap-3">
|
||||
<div class="text-xs text-base-content/50 flex items-center gap-2">
|
||||
<span>Last saved: <span id="last-saved">{{ scratchpad.last_saved_at }}</span></span>
|
||||
<span>Last saved: <span id="last-saved">{{ scratchpad.last_saved_at | datetimeformat(format="short", tz=user.timezone) }}</span></span>
|
||||
<span id="save-status"
|
||||
class="inline-flex items-center gap-1 text-success opacity-0 transition-opacity duration-300 pointer-events-none">
|
||||
{% include "icons/check_icon.html" %} <span class="uppercase tracking-wider text-[0.7em]">Saved</span>
|
||||
|
||||
@@ -18,7 +18,8 @@ async-openai = { workspace = true }
|
||||
surrealdb = { workspace = true }
|
||||
dom_smoothie = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
axum_typed_multipart = { workspace = true}
|
||||
axum_typed_multipart = { workspace = true}
|
||||
anyhow = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
text-splitter = { workspace = true }
|
||||
@@ -28,6 +29,7 @@ headless_chrome = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
pdf-extract = "0.9"
|
||||
lopdf = "0.32"
|
||||
bytes = { workspace = true }
|
||||
|
||||
common = { path = "../common" }
|
||||
composite-retrieval = { path = "../composite-retrieval" }
|
||||
|
||||
@@ -19,6 +19,7 @@ use common::{
|
||||
error::AppError,
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
store::StorageManager,
|
||||
types::{
|
||||
ingestion_payload::IngestionPayload,
|
||||
ingestion_task::{IngestionTask, TaskErrorInfo},
|
||||
@@ -47,12 +48,14 @@ impl IngestionPipeline {
|
||||
openai_client: Arc<Client<async_openai::config::OpenAIConfig>>,
|
||||
config: AppConfig,
|
||||
reranker_pool: Option<Arc<RerankerPool>>,
|
||||
storage: StorageManager,
|
||||
) -> Result<Self, AppError> {
|
||||
let services = DefaultPipelineServices::new(
|
||||
db.clone(),
|
||||
openai_client.clone(),
|
||||
config.clone(),
|
||||
reranker_pool,
|
||||
storage,
|
||||
);
|
||||
|
||||
Self::with_services(db, IngestionConfig::default(), Arc::new(services))
|
||||
|
||||
@@ -2,6 +2,7 @@ use common::{
|
||||
error::AppError,
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
store::StorageManager,
|
||||
types::{
|
||||
ingestion_payload::IngestionPayload,
|
||||
text_content::{TextContent, UrlInfo},
|
||||
@@ -19,6 +20,7 @@ pub(crate) async fn to_text_content(
|
||||
db: &SurrealDbClient,
|
||||
config: &AppConfig,
|
||||
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
storage: &StorageManager,
|
||||
) -> Result<TextContent, AppError> {
|
||||
match ingestion_payload {
|
||||
IngestionPayload::Url {
|
||||
@@ -27,7 +29,7 @@ pub(crate) async fn to_text_content(
|
||||
category,
|
||||
user_id,
|
||||
} => {
|
||||
let (article, file_info) = extract_text_from_url(&url, db, &user_id, config).await?;
|
||||
let (article, file_info) = extract_text_from_url(&url, db, &user_id, storage).await?;
|
||||
Ok(TextContent::new(
|
||||
article.text_content.into(),
|
||||
Some(context),
|
||||
@@ -60,7 +62,8 @@ pub(crate) async fn to_text_content(
|
||||
category,
|
||||
user_id,
|
||||
} => {
|
||||
let text = extract_text_from_file(&file_info, db, openai_client, config).await?;
|
||||
let text =
|
||||
extract_text_from_file(&file_info, db, openai_client, config, storage).await?;
|
||||
Ok(TextContent::new(
|
||||
text,
|
||||
Some(context),
|
||||
|
||||
@@ -10,6 +10,7 @@ use common::{
|
||||
error::AppError,
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
store::StorageManager,
|
||||
types::{
|
||||
ingestion_payload::IngestionPayload, knowledge_entity::KnowledgeEntity,
|
||||
knowledge_relationship::KnowledgeRelationship, system_settings::SystemSettings,
|
||||
@@ -65,6 +66,7 @@ pub struct DefaultPipelineServices {
|
||||
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
|
||||
config: AppConfig,
|
||||
reranker_pool: Option<Arc<RerankerPool>>,
|
||||
storage: StorageManager,
|
||||
}
|
||||
|
||||
impl DefaultPipelineServices {
|
||||
@@ -73,12 +75,14 @@ impl DefaultPipelineServices {
|
||||
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
|
||||
config: AppConfig,
|
||||
reranker_pool: Option<Arc<RerankerPool>>,
|
||||
storage: StorageManager,
|
||||
) -> Self {
|
||||
Self {
|
||||
db,
|
||||
openai_client,
|
||||
config,
|
||||
reranker_pool,
|
||||
storage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,7 +148,14 @@ impl PipelineServices for DefaultPipelineServices {
|
||||
&self,
|
||||
payload: IngestionPayload,
|
||||
) -> Result<TextContent, AppError> {
|
||||
to_text_content(payload, &self.db, &self.config, &self.openai_client).await
|
||||
to_text_content(
|
||||
payload,
|
||||
&self.db,
|
||||
&self.config,
|
||||
&self.openai_client,
|
||||
&self.storage,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn retrieve_similar_entities(
|
||||
|
||||
@@ -1,63 +1,200 @@
|
||||
use anyhow::anyhow;
|
||||
use common::{
|
||||
error::AppError,
|
||||
storage::{db::SurrealDbClient, store, types::file_info::FileInfo},
|
||||
storage::{db::SurrealDbClient, store::StorageManager, types::file_info::FileInfo},
|
||||
utils::config::AppConfig,
|
||||
};
|
||||
use std::{
|
||||
env,
|
||||
io::{Error as IoError, ErrorKind},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{
|
||||
audio_transcription::transcribe_audio_file, image_parsing::extract_text_from_image,
|
||||
pdf_ingestion::extract_pdf_content,
|
||||
};
|
||||
|
||||
struct TempPathGuard {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl TempPathGuard {
|
||||
fn as_path(&self) -> &Path {
|
||||
&self.path
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TempPathGuard {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::fs::remove_file(&self.path);
|
||||
}
|
||||
}
|
||||
|
||||
async fn materialize_temp_file(
|
||||
bytes: &[u8],
|
||||
extension: Option<&str>,
|
||||
) -> Result<TempPathGuard, AppError> {
|
||||
let mut path = env::temp_dir();
|
||||
let mut file_name = format!("minne-ingest-{}", Uuid::new_v4());
|
||||
|
||||
if let Some(ext) = extension {
|
||||
if !ext.is_empty() {
|
||||
file_name.push('.');
|
||||
file_name.push_str(ext);
|
||||
}
|
||||
}
|
||||
|
||||
path.push(file_name);
|
||||
|
||||
tokio::fs::write(&path, bytes).await?;
|
||||
|
||||
Ok(TempPathGuard { path })
|
||||
}
|
||||
|
||||
async fn resolve_existing_local_path(storage: &StorageManager, location: &str) -> Option<PathBuf> {
|
||||
let path = storage.resolve_local_path(location)?;
|
||||
match tokio::fs::metadata(&path).await {
|
||||
Ok(_) => Some(path),
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn infer_extension(file_info: &FileInfo) -> Option<String> {
|
||||
Path::new(&file_info.path)
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.map(|ext| ext.to_string())
|
||||
}
|
||||
|
||||
pub async fn extract_text_from_file(
|
||||
file_info: &FileInfo,
|
||||
db_client: &SurrealDbClient,
|
||||
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
config: &AppConfig,
|
||||
storage: &StorageManager,
|
||||
) -> Result<String, AppError> {
|
||||
let base_path = store::resolve_base_dir(config);
|
||||
let absolute_path = base_path.join(&file_info.path);
|
||||
let file_bytes = storage
|
||||
.get(&file_info.path)
|
||||
.await
|
||||
.map_err(|e| AppError::from(anyhow!(e)))?;
|
||||
let local_path = resolve_existing_local_path(storage, &file_info.path).await;
|
||||
|
||||
match file_info.mime_type.as_str() {
|
||||
"text/plain" | "text/markdown" | "application/octet-stream" | "text/x-rust" => {
|
||||
let content = tokio::fs::read_to_string(&absolute_path).await?;
|
||||
let content = String::from_utf8(file_bytes.to_vec())
|
||||
.map_err(|err| AppError::Io(IoError::new(ErrorKind::InvalidData, err)))?;
|
||||
Ok(content)
|
||||
}
|
||||
"application/pdf" => {
|
||||
extract_pdf_content(
|
||||
&absolute_path,
|
||||
if let Some(path) = local_path.as_ref() {
|
||||
return extract_pdf_content(
|
||||
path,
|
||||
db_client,
|
||||
openai_client,
|
||||
&config.pdf_ingest_mode,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let temp_guard = materialize_temp_file(file_bytes.as_ref(), Some("pdf")).await?;
|
||||
let result = extract_pdf_content(
|
||||
temp_guard.as_path(),
|
||||
db_client,
|
||||
openai_client,
|
||||
&config.pdf_ingest_mode,
|
||||
)
|
||||
.await
|
||||
.await;
|
||||
drop(temp_guard);
|
||||
result
|
||||
}
|
||||
"image/png" | "image/jpeg" => {
|
||||
let path_str = absolute_path
|
||||
.to_str()
|
||||
.ok_or_else(|| {
|
||||
AppError::Processing(format!(
|
||||
"Encountered a non-UTF8 path while reading image {}",
|
||||
file_info.id
|
||||
))
|
||||
})?
|
||||
.to_string();
|
||||
let content = extract_text_from_image(&path_str, db_client, openai_client).await?;
|
||||
let content =
|
||||
extract_text_from_image(file_bytes.as_ref(), db_client, openai_client).await?;
|
||||
Ok(content)
|
||||
}
|
||||
"audio/mpeg" | "audio/mp3" | "audio/wav" | "audio/x-wav" | "audio/webm" | "audio/mp4"
|
||||
| "audio/ogg" | "audio/flac" => {
|
||||
let path_str = absolute_path
|
||||
.to_str()
|
||||
.ok_or_else(|| {
|
||||
if let Some(path) = local_path.as_ref() {
|
||||
let path_str = path.to_str().ok_or_else(|| {
|
||||
AppError::Processing(format!(
|
||||
"Encountered a non-UTF8 path while reading audio {}",
|
||||
file_info.id
|
||||
))
|
||||
})?
|
||||
.to_string();
|
||||
transcribe_audio_file(&path_str, db_client, openai_client).await
|
||||
})?;
|
||||
return transcribe_audio_file(path_str, db_client, openai_client).await;
|
||||
}
|
||||
|
||||
let extension = infer_extension(file_info);
|
||||
let temp_guard =
|
||||
materialize_temp_file(file_bytes.as_ref(), extension.as_deref()).await?;
|
||||
let path_str = temp_guard.as_path().to_str().ok_or_else(|| {
|
||||
AppError::Processing(format!(
|
||||
"Encountered a non-UTF8 path while reading audio {}",
|
||||
file_info.id
|
||||
))
|
||||
})?;
|
||||
let result = transcribe_audio_file(path_str, db_client, openai_client).await;
|
||||
drop(temp_guard);
|
||||
result
|
||||
}
|
||||
_ => Err(AppError::NotFound(file_info.mime_type.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use async_openai::{config::OpenAIConfig, Client};
|
||||
use bytes::Bytes;
|
||||
use chrono::Utc;
|
||||
use common::{
|
||||
storage::{db::SurrealDbClient, store::StorageManager},
|
||||
utils::config::{AppConfig, StorageKind},
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn extracts_text_using_memory_storage_backend() {
|
||||
let mut config = AppConfig::default();
|
||||
config.storage = StorageKind::Memory;
|
||||
|
||||
let storage = StorageManager::new(&config)
|
||||
.await
|
||||
.expect("create storage manager");
|
||||
|
||||
let location = "user/test/file.txt";
|
||||
let contents = b"hello from memory storage";
|
||||
|
||||
storage
|
||||
.put(location, Bytes::from(contents.as_slice().to_vec()))
|
||||
.await
|
||||
.expect("write object");
|
||||
|
||||
let now = Utc::now();
|
||||
let file_info = FileInfo {
|
||||
id: "file".into(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
sha256: "sha256".into(),
|
||||
path: location.to_string(),
|
||||
file_name: "file.txt".into(),
|
||||
mime_type: "text/plain".into(),
|
||||
user_id: "user".into(),
|
||||
};
|
||||
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database)
|
||||
.await
|
||||
.expect("create surreal memory");
|
||||
|
||||
let openai_client = Client::with_config(OpenAIConfig::default());
|
||||
|
||||
let text = extract_text_from_file(&file_info, &db, &openai_client, &config, &storage)
|
||||
.await
|
||||
.expect("extract text");
|
||||
|
||||
assert_eq!(text, String::from_utf8_lossy(contents));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,14 +10,13 @@ use common::{
|
||||
};
|
||||
|
||||
pub async fn extract_text_from_image(
|
||||
path: &str,
|
||||
image_bytes: &[u8],
|
||||
db: &SurrealDbClient,
|
||||
client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
) -> Result<String, AppError> {
|
||||
let system_settings = SystemSettings::get_current(db).await?;
|
||||
let image_bytes = tokio::fs::read(&path).await?;
|
||||
|
||||
let base64_image = STANDARD.encode(&image_bytes);
|
||||
let base64_image = STANDARD.encode(image_bytes);
|
||||
|
||||
let image_url = format!("data:image/png;base64,{base64_image}");
|
||||
|
||||
|
||||
@@ -3,8 +3,7 @@ use axum_typed_multipart::{FieldData, FieldMetadata};
|
||||
use chrono::Utc;
|
||||
use common::{
|
||||
error::AppError,
|
||||
storage::{db::SurrealDbClient, types::file_info::FileInfo},
|
||||
utils::config::AppConfig,
|
||||
storage::{db::SurrealDbClient, store::StorageManager, types::file_info::FileInfo},
|
||||
};
|
||||
use dom_smoothie::{Article, Readability, TextMode};
|
||||
use headless_chrome::Browser;
|
||||
@@ -19,7 +18,7 @@ pub async fn extract_text_from_url(
|
||||
url: &str,
|
||||
db: &SurrealDbClient,
|
||||
user_id: &str,
|
||||
config: &AppConfig,
|
||||
storage: &StorageManager,
|
||||
) -> Result<(Article, FileInfo), AppError> {
|
||||
info!("Fetching URL: {}", url);
|
||||
let now = Instant::now();
|
||||
@@ -81,7 +80,7 @@ pub async fn extract_text_from_url(
|
||||
metadata,
|
||||
};
|
||||
|
||||
let file_info = FileInfo::new(field_data, db, user_id, config).await?;
|
||||
let file_info = FileInfo::new_with_storage(field_data, db, user_id, storage).await?;
|
||||
|
||||
let config = dom_smoothie::Config {
|
||||
text_mode: TextMode::Markdown,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "main"
|
||||
version = "0.2.6"
|
||||
version = "0.2.7"
|
||||
edition = "2021"
|
||||
repository = "https://github.com/perstarkse/minne"
|
||||
license = "AGPL-3.0-or-later"
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use api_router::{api_routes_v1, api_state::ApiState};
|
||||
use axum::{extract::FromRef, Router};
|
||||
use common::{storage::db::SurrealDbClient, utils::config::get_config};
|
||||
use common::{
|
||||
storage::db::SurrealDbClient, storage::store::StorageManager, utils::config::get_config,
|
||||
};
|
||||
use composite_retrieval::reranking::RerankerPool;
|
||||
use html_router::{html_routes, html_state::HtmlState};
|
||||
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
|
||||
@@ -46,18 +48,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
|
||||
|
||||
// Create global storage manager
|
||||
let storage = StorageManager::new(&config).await?;
|
||||
|
||||
let html_state = HtmlState::new_with_resources(
|
||||
db,
|
||||
openai_client,
|
||||
session_store,
|
||||
storage.clone(),
|
||||
config.clone(),
|
||||
reranker_pool.clone(),
|
||||
)?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
let api_state = ApiState {
|
||||
db: html_state.db.clone(),
|
||||
config: config.clone(),
|
||||
};
|
||||
let api_state = ApiState::new(&config, storage.clone()).await?;
|
||||
|
||||
// Create Axum router
|
||||
let app = Router::new()
|
||||
@@ -115,6 +119,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
openai_client.clone(),
|
||||
config.clone(),
|
||||
reranker_pool.clone(),
|
||||
storage.clone(), // Use the global storage manager
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -147,6 +152,7 @@ struct AppState {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use axum::{body::Body, http::Request, http::StatusCode, Router};
|
||||
use common::storage::store::StorageManager;
|
||||
use common::utils::config::{AppConfig, PdfIngestMode, StorageKind};
|
||||
use std::{path::Path, sync::Arc};
|
||||
use tower::ServiceExt;
|
||||
@@ -195,18 +201,25 @@ mod tests {
|
||||
.with_api_base(&config.openai_base_url),
|
||||
));
|
||||
|
||||
let storage = StorageManager::new(&config)
|
||||
.await
|
||||
.expect("failed to build storage manager");
|
||||
|
||||
let html_state = HtmlState::new_with_resources(
|
||||
db.clone(),
|
||||
openai_client,
|
||||
session_store,
|
||||
storage.clone(),
|
||||
config.clone(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("failed to build html state");
|
||||
|
||||
let api_state = ApiState {
|
||||
db: html_state.db.clone(),
|
||||
db: db.clone(),
|
||||
config: config.clone(),
|
||||
storage,
|
||||
};
|
||||
|
||||
let app = Router::new()
|
||||
|
||||
@@ -2,7 +2,9 @@ use std::sync::Arc;
|
||||
|
||||
use api_router::{api_routes_v1, api_state::ApiState};
|
||||
use axum::{extract::FromRef, Router};
|
||||
use common::{storage::db::SurrealDbClient, utils::config::get_config};
|
||||
use common::{
|
||||
storage::db::SurrealDbClient, storage::store::StorageManager, utils::config::get_config,
|
||||
};
|
||||
use composite_retrieval::reranking::RerankerPool;
|
||||
use html_router::{html_routes, html_state::HtmlState};
|
||||
use tracing::info;
|
||||
@@ -44,18 +46,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
|
||||
|
||||
// Create global storage manager
|
||||
let storage = StorageManager::new(&config).await?;
|
||||
|
||||
let html_state = HtmlState::new_with_resources(
|
||||
db,
|
||||
openai_client,
|
||||
session_store,
|
||||
storage.clone(),
|
||||
config.clone(),
|
||||
reranker_pool,
|
||||
)?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
let api_state = ApiState {
|
||||
db: html_state.db.clone(),
|
||||
config: config.clone(),
|
||||
};
|
||||
let api_state = ApiState::new(&config, storage).await?;
|
||||
|
||||
// Create Axum router
|
||||
let app = Router::new()
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::{storage::db::SurrealDbClient, utils::config::get_config};
|
||||
use common::{
|
||||
storage::db::SurrealDbClient, storage::store::StorageManager, utils::config::get_config,
|
||||
};
|
||||
use composite_retrieval::reranking::RerankerPool;
|
||||
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
|
||||
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
||||
@@ -35,8 +37,18 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
|
||||
|
||||
// Create global storage manager
|
||||
let storage = StorageManager::new(&config).await?;
|
||||
|
||||
let ingestion_pipeline = Arc::new(
|
||||
IngestionPipeline::new(db.clone(), openai_client.clone(), config, reranker_pool).await?,
|
||||
IngestionPipeline::new(
|
||||
db.clone(),
|
||||
openai_client.clone(),
|
||||
config,
|
||||
reranker_pool,
|
||||
storage,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
run_worker_loop(db, ingestion_pipeline).await
|
||||
|
||||
Reference in New Issue
Block a user