10 Commits
v0.2.6 ... main

Author SHA1 Message Date
Per Stark
a2c9bb848d release: 0.2.7 2025-12-04 12:25:46 +01:00
Per Stark
04ee225732 design: improved admin page, new structure 2025-11-04 20:42:24 +01:00
Per Stark
13b7ad6f3a fix: added cargo lock to crane build 2025-11-04 12:59:32 +01:00
Per Stark
112a6965a4 Merge branch 'main' into development 2025-11-03 12:48:04 +01:00
Per Stark
911e830be5 Merge branch 'development' of github.com:perstarkse/minne into development 2025-11-03 12:40:36 +01:00
Per Stark
3196e65172 fix: improved storage manager, prep for s3 2025-11-03 12:39:15 +01:00
Per Stark
f13791cfcf fix: better default naming of relationships 2025-10-27 20:46:00 +01:00
Per Stark
75c200b2ba fix: update graph view when changes in knowledge store 2025-10-27 18:22:15 +01:00
Per Stark
1b7c24747a fix: in memory object store handler for testing 2025-10-27 17:03:03 +01:00
Per Stark
241ad9a089 fix: scratchpad tz aware datetime 2025-10-27 14:00:22 +01:00
36 changed files with 2124 additions and 761 deletions

View File

@@ -1,8 +1,13 @@
# Changelog
## Unreleased
## Version 0.2.7 (2025-12-04)
- Improved admin page, now only loads models when specifically requested. Groundwork for coming configuration features.
- Fix: timezone aware info in scratchpad
## Version 0.2.6 (2025-10-29)
- Added an opt-in FastEmbed-based reranking stage behind `reranking_enabled`. It improves retrieval accuracy by re-scoring hybrid results.
- Fix: default name for relationships harmonized across application
## Version 0.2.5 (2025-10-24)
- Added manual knowledge entity creation flows using a modal, with the option for suggested relationships

2
Cargo.lock generated
View File

@@ -3248,11 +3248,13 @@ dependencies = [
name = "ingestion-pipeline"
version = "0.1.0"
dependencies = [
"anyhow",
"async-openai",
"async-trait",
"axum",
"axum_typed_multipart",
"base64 0.22.1",
"bytes",
"chrono",
"common",
"composite-retrieval",

View File

@@ -1,15 +1,22 @@
use std::sync::Arc;
use common::{storage::db::SurrealDbClient, utils::config::AppConfig};
use common::{
storage::{db::SurrealDbClient, store::StorageManager},
utils::config::AppConfig,
};
#[derive(Clone)]
pub struct ApiState {
pub db: Arc<SurrealDbClient>,
pub config: AppConfig,
pub storage: StorageManager,
}
impl ApiState {
pub async fn new(config: &AppConfig) -> Result<Self, Box<dyn std::error::Error>> {
pub async fn new(
config: &AppConfig,
storage: StorageManager,
) -> Result<Self, Box<dyn std::error::Error>> {
let surreal_db_client = Arc::new(
SurrealDbClient::new(
&config.surrealdb_address,
@@ -26,6 +33,7 @@ impl ApiState {
let app_state = Self {
db: surreal_db_client.clone(),
config: config.clone(),
storage,
};
Ok(app_state)

View File

@@ -32,7 +32,8 @@ pub async fn ingest_data(
info!("Received input: {:?}", input);
let file_infos = try_join_all(input.files.into_iter().map(|file| {
FileInfo::new(file, &state.db, &user.id, &state.config).map_err(AppError::from)
FileInfo::new_with_storage(file, &state.db, &user.id, &state.storage)
.map_err(AppError::from)
}))
.await?;

View File

@@ -1,4 +1,5 @@
use std::path::{Path, PathBuf};
use std::io::ErrorKind;
use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
use anyhow::{anyhow, Result as AnyResult};
@@ -6,36 +7,421 @@ use bytes::Bytes;
use futures::stream::BoxStream;
use futures::{StreamExt, TryStreamExt};
use object_store::local::LocalFileSystem;
use object_store::memory::InMemory;
use object_store::{path::Path as ObjPath, ObjectStore};
use crate::utils::config::{AppConfig, StorageKind};
pub type DynStore = Arc<dyn ObjectStore>;
/// Build an object store instance anchored at the given filesystem `prefix`.
/// Storage manager with persistent state and proper lifecycle management.
#[derive(Clone)]
pub struct StorageManager {
store: DynStore,
backend_kind: StorageKind,
local_base: Option<PathBuf>,
}
impl StorageManager {
/// Create a new StorageManager with the specified configuration.
///
/// This method validates the configuration and creates the appropriate
/// storage backend with proper initialization.
pub async fn new(cfg: &AppConfig) -> object_store::Result<Self> {
let backend_kind = cfg.storage.clone();
let (store, local_base) = create_storage_backend(cfg).await?;
Ok(Self {
store,
backend_kind,
local_base,
})
}
/// Create a StorageManager with a custom storage backend.
///
/// This method is useful for testing scenarios where you want to inject
/// a specific storage backend.
pub fn with_backend(store: DynStore, backend_kind: StorageKind) -> Self {
Self {
store,
backend_kind,
local_base: None,
}
}
/// Get the storage backend kind.
pub fn backend_kind(&self) -> &StorageKind {
&self.backend_kind
}
/// Access the resolved local base directory when using the local backend.
pub fn local_base_path(&self) -> Option<&Path> {
self.local_base.as_deref()
}
/// Resolve an object location to a filesystem path when using the local backend.
///
/// Returns `None` when the backend is not local or when the provided location includes
/// unsupported components (absolute paths or parent traversals).
pub fn resolve_local_path(&self, location: &str) -> Option<PathBuf> {
let base = self.local_base_path()?;
let relative = Path::new(location);
if relative.is_absolute()
|| relative
.components()
.any(|component| matches!(component, Component::ParentDir | Component::Prefix(_)))
{
return None;
}
Some(base.join(relative))
}
/// Store bytes at the specified location.
///
/// This operation persists data using the underlying storage backend.
/// For memory backends, data persists for the lifetime of the StorageManager.
pub async fn put(&self, location: &str, data: Bytes) -> object_store::Result<()> {
let path = ObjPath::from(location);
let payload = object_store::PutPayload::from_bytes(data);
self.store.put(&path, payload).await.map(|_| ())
}
/// Retrieve bytes from the specified location.
///
/// Returns the full contents buffered in memory.
pub async fn get(&self, location: &str) -> object_store::Result<Bytes> {
let path = ObjPath::from(location);
let result = self.store.get(&path).await?;
result.bytes().await
}
/// Get a streaming handle for large objects.
///
/// Returns a fallible stream of Bytes chunks suitable for large file processing.
pub async fn get_stream(
&self,
location: &str,
) -> object_store::Result<BoxStream<'static, object_store::Result<Bytes>>> {
let path = ObjPath::from(location);
let result = self.store.get(&path).await?;
Ok(result.into_stream())
}
/// Delete all objects below the specified prefix.
///
/// For local filesystem backends, this also attempts to clean up empty directories.
pub async fn delete_prefix(&self, prefix: &str) -> object_store::Result<()> {
let prefix_path = ObjPath::from(prefix);
let locations = self
.store
.list(Some(&prefix_path))
.map_ok(|m| m.location)
.boxed();
self.store
.delete_stream(locations)
.try_collect::<Vec<_>>()
.await?;
// Cleanup filesystem directories only for local backend
if matches!(self.backend_kind, StorageKind::Local) {
self.cleanup_filesystem_directories(prefix).await?;
}
Ok(())
}
/// List all objects below the specified prefix.
pub async fn list(
&self,
prefix: Option<&str>,
) -> object_store::Result<Vec<object_store::ObjectMeta>> {
let prefix_path = prefix.map(ObjPath::from);
self.store.list(prefix_path.as_ref()).try_collect().await
}
/// Check if an object exists at the specified location.
pub async fn exists(&self, location: &str) -> object_store::Result<bool> {
let path = ObjPath::from(location);
self.store
.head(&path)
.await
.map(|_| true)
.or_else(|e| match e {
object_store::Error::NotFound { .. } => Ok(false),
_ => Err(e),
})
}
/// Cleanup filesystem directories for local backend.
///
/// This is a best-effort cleanup and ignores errors.
async fn cleanup_filesystem_directories(&self, prefix: &str) -> object_store::Result<()> {
if !matches!(self.backend_kind, StorageKind::Local) {
return Ok(());
}
let Some(base) = &self.local_base else {
return Ok(());
};
let relative = Path::new(prefix);
if relative.is_absolute()
|| relative
.components()
.any(|component| matches!(component, Component::ParentDir | Component::Prefix(_)))
{
tracing::warn!(
prefix = %prefix,
"Skipping directory cleanup for unsupported prefix components"
);
return Ok(());
}
let mut current = base.join(relative);
while current.starts_with(base) && current.as_path() != base.as_path() {
match tokio::fs::remove_dir(&current).await {
Ok(_) => {}
Err(err) => match err.kind() {
ErrorKind::NotFound => {}
ErrorKind::DirectoryNotEmpty => break,
_ => tracing::debug!(
error = %err,
path = %current.display(),
"Failed to remove directory during cleanup"
),
},
}
if let Some(parent) = current.parent() {
current = parent.to_path_buf();
} else {
break;
}
}
Ok(())
}
}
/// Create a storage backend based on configuration.
///
/// - For the `Local` backend, `prefix` is the absolute directory on disk that
/// serves as the root for all object paths passed to the store.
/// - `prefix` must already exist; this function will create it if missing.
///
/// Example (Local):
/// - prefix: `/var/data`
/// - object location: `user/uuid/file.txt`
/// - absolute path: `/var/data/user/uuid/file.txt`
pub async fn build_store(prefix: &Path, cfg: &AppConfig) -> object_store::Result<DynStore> {
/// This factory function handles the creation and initialization of different
/// storage backends with proper error handling and validation.
async fn create_storage_backend(
cfg: &AppConfig,
) -> object_store::Result<(DynStore, Option<PathBuf>)> {
match cfg.storage {
StorageKind::Local => {
if !prefix.exists() {
tokio::fs::create_dir_all(prefix).await.map_err(|e| {
let base = resolve_base_dir(cfg);
if !base.exists() {
tokio::fs::create_dir_all(&base).await.map_err(|e| {
object_store::Error::Generic {
store: "LocalFileSystem",
source: e.into(),
}
})?;
}
let store = LocalFileSystem::new_with_prefix(prefix)?;
Ok(Arc::new(store))
let store = LocalFileSystem::new_with_prefix(base.clone())?;
Ok((Arc::new(store), Some(base)))
}
StorageKind::Memory => {
let store = InMemory::new();
Ok((Arc::new(store), None))
}
}
}
/// Testing utilities for storage operations.
///
/// This module provides specialized utilities for testing scenarios with
/// automatic memory backend setup and proper test isolation.
#[cfg(test)]
pub mod testing {
use super::*;
use crate::utils::config::{AppConfig, PdfIngestMode};
use uuid;
/// Create a test configuration with memory storage.
///
/// This provides a ready-to-use configuration for testing scenarios
/// that don't require filesystem persistence.
pub fn test_config_memory() -> AppConfig {
AppConfig {
openai_api_key: "test".into(),
surrealdb_address: "test".into(),
surrealdb_username: "test".into(),
surrealdb_password: "test".into(),
surrealdb_namespace: "test".into(),
surrealdb_database: "test".into(),
data_dir: "/tmp/unused".into(), // Ignored for memory storage
http_port: 0,
openai_base_url: "..".into(),
storage: StorageKind::Memory,
pdf_ingest_mode: PdfIngestMode::LlmFirst,
..Default::default()
}
}
/// Create a test configuration with local storage.
///
/// This provides a ready-to-use configuration for testing scenarios
/// that require actual filesystem operations.
pub fn test_config_local() -> AppConfig {
let base = format!("/tmp/minne_test_storage_{}", uuid::Uuid::new_v4());
AppConfig {
openai_api_key: "test".into(),
surrealdb_address: "test".into(),
surrealdb_username: "test".into(),
surrealdb_password: "test".into(),
surrealdb_namespace: "test".into(),
surrealdb_database: "test".into(),
data_dir: base.into(),
http_port: 0,
openai_base_url: "..".into(),
storage: StorageKind::Local,
pdf_ingest_mode: PdfIngestMode::LlmFirst,
..Default::default()
}
}
/// A specialized StorageManager for testing scenarios.
///
/// This provides automatic setup for memory storage with proper isolation
/// and cleanup capabilities for test environments.
#[derive(Clone)]
pub struct TestStorageManager {
storage: StorageManager,
_temp_dir: Option<(String, std::path::PathBuf)>, // For local storage cleanup
}
impl TestStorageManager {
/// Create a new TestStorageManager with memory backend.
///
/// This is the preferred method for unit tests as it provides
/// fast execution and complete isolation.
pub async fn new_memory() -> object_store::Result<Self> {
let cfg = test_config_memory();
let storage = StorageManager::new(&cfg).await?;
Ok(Self {
storage,
_temp_dir: None,
})
}
/// Create a new TestStorageManager with local filesystem backend.
///
/// This method creates a temporary directory that will be automatically
/// cleaned up when the TestStorageManager is dropped.
pub async fn new_local() -> object_store::Result<Self> {
let cfg = test_config_local();
let storage = StorageManager::new(&cfg).await?;
let resolved = storage
.local_base_path()
.map(|path| (cfg.data_dir.clone(), path.to_path_buf()));
Ok(Self {
storage,
_temp_dir: resolved,
})
}
/// Create a TestStorageManager with custom configuration.
pub async fn with_config(cfg: &AppConfig) -> object_store::Result<Self> {
let storage = StorageManager::new(cfg).await?;
let temp_dir = if matches!(cfg.storage, StorageKind::Local) {
storage
.local_base_path()
.map(|path| (cfg.data_dir.clone(), path.to_path_buf()))
} else {
None
};
Ok(Self {
storage,
_temp_dir: temp_dir,
})
}
/// Get a reference to the underlying StorageManager.
pub fn storage(&self) -> &StorageManager {
&self.storage
}
/// Clone the underlying StorageManager.
pub fn clone_storage(&self) -> StorageManager {
self.storage.clone()
}
/// Store test data at the specified location.
pub async fn put(&self, location: &str, data: &[u8]) -> object_store::Result<()> {
self.storage.put(location, Bytes::from(data.to_vec())).await
}
/// Retrieve test data from the specified location.
pub async fn get(&self, location: &str) -> object_store::Result<Bytes> {
self.storage.get(location).await
}
/// Delete test data below the specified prefix.
pub async fn delete_prefix(&self, prefix: &str) -> object_store::Result<()> {
self.storage.delete_prefix(prefix).await
}
/// Check if test data exists at the specified location.
pub async fn exists(&self, location: &str) -> object_store::Result<bool> {
self.storage.exists(location).await
}
/// List all test objects below the specified prefix.
pub async fn list(
&self,
prefix: Option<&str>,
) -> object_store::Result<Vec<object_store::ObjectMeta>> {
self.storage.list(prefix).await
}
}
impl Drop for TestStorageManager {
fn drop(&mut self) {
// Clean up temporary directories for local storage
if let Some((_, path)) = &self._temp_dir {
if path.exists() {
let _ = std::fs::remove_dir_all(path);
}
}
}
}
/// Convenience macro for creating memory storage tests.
///
/// This macro simplifies the creation of test storage with memory backend.
#[macro_export]
macro_rules! test_storage_memory {
() => {{
async move {
$crate::storage::store::testing::TestStorageManager::new_memory()
.await
.expect("Failed to create test memory storage")
}
}};
}
/// Convenience macro for creating local storage tests.
///
/// This macro simplifies the creation of test storage with local filesystem backend.
#[macro_export]
macro_rules! test_storage_local {
() => {{
async move {
$crate::storage::store::testing::TestStorageManager::new_local()
.await
.expect("Failed to create test local storage")
}
}};
}
}
@@ -52,124 +438,6 @@ pub fn resolve_base_dir(cfg: &AppConfig) -> PathBuf {
}
}
/// Build an object store rooted at the configured data directory.
///
/// This is the recommended way to obtain a store for logical object operations
/// such as `put_bytes_at`, `get_bytes_at`, and `delete_prefix_at`.
pub async fn build_store_root(cfg: &AppConfig) -> object_store::Result<DynStore> {
let base = resolve_base_dir(cfg);
build_store(&base, cfg).await
}
/// Write bytes to `file_name` within a filesystem `prefix` using the configured store.
///
/// Prefer [`put_bytes_at`] for location-based writes that do not need to compute
/// a separate filesystem prefix.
pub async fn put_bytes(
prefix: &Path,
file_name: &str,
data: Bytes,
cfg: &AppConfig,
) -> object_store::Result<()> {
let store = build_store(prefix, cfg).await?;
let payload = object_store::PutPayload::from_bytes(data);
store.put(&ObjPath::from(file_name), payload).await?;
Ok(())
}
/// Write bytes to the provided logical object `location`, e.g. `"user/uuid/file"`.
///
/// The store root is taken from `AppConfig::data_dir` for the local backend.
/// This performs an atomic write as guaranteed by `object_store`.
pub async fn put_bytes_at(
location: &str,
data: Bytes,
cfg: &AppConfig,
) -> object_store::Result<()> {
let store = build_store_root(cfg).await?;
let payload = object_store::PutPayload::from_bytes(data);
store.put(&ObjPath::from(location), payload).await?;
Ok(())
}
/// Read bytes from `file_name` within a filesystem `prefix` using the configured store.
///
/// Prefer [`get_bytes_at`] for location-based reads.
pub async fn get_bytes(
prefix: &Path,
file_name: &str,
cfg: &AppConfig,
) -> object_store::Result<Bytes> {
let store = build_store(prefix, cfg).await?;
let r = store.get(&ObjPath::from(file_name)).await?;
let b = r.bytes().await?;
Ok(b)
}
/// Read bytes from the provided logical object `location`.
///
/// Returns the full contents buffered in memory.
pub async fn get_bytes_at(location: &str, cfg: &AppConfig) -> object_store::Result<Bytes> {
let store = build_store_root(cfg).await?;
let r = store.get(&ObjPath::from(location)).await?;
r.bytes().await
}
/// Get a streaming body for the provided logical object `location`.
///
/// Returns a fallible `BoxStream` of `Bytes`, suitable for use with
/// `axum::body::Body::from_stream` to stream responses without buffering.
pub async fn get_stream_at(
location: &str,
cfg: &AppConfig,
) -> object_store::Result<BoxStream<'static, object_store::Result<Bytes>>> {
let store = build_store_root(cfg).await?;
let r = store.get(&ObjPath::from(location)).await?;
Ok(r.into_stream())
}
/// Delete all objects below the provided filesystem `prefix`.
///
/// This is a low-level variant for when a dedicated on-disk prefix is used for a
/// particular object grouping. Prefer [`delete_prefix_at`] for location-based stores.
pub async fn delete_prefix(prefix: &Path, cfg: &AppConfig) -> object_store::Result<()> {
let store = build_store(prefix, cfg).await?;
// list everything and delete
let locations = store.list(None).map_ok(|m| m.location).boxed();
store
.delete_stream(locations)
.try_collect::<Vec<_>>()
.await?;
// Best effort remove the directory itself
if tokio::fs::try_exists(prefix).await.unwrap_or(false) {
let _ = tokio::fs::remove_dir_all(prefix).await;
}
Ok(())
}
/// Delete all objects below the provided logical object `prefix`, e.g. `"user/uuid/"`.
///
/// After deleting, attempts a best-effort cleanup of the now-empty directory on disk
/// when using the local backend.
pub async fn delete_prefix_at(prefix: &str, cfg: &AppConfig) -> object_store::Result<()> {
let store = build_store_root(cfg).await?;
let prefix_path = ObjPath::from(prefix);
let locations = store
.list(Some(&prefix_path))
.map_ok(|m| m.location)
.boxed();
store
.delete_stream(locations)
.try_collect::<Vec<_>>()
.await?;
// Best effort remove empty directory on disk for local storage
let base_dir = resolve_base_dir(cfg).join(prefix);
if tokio::fs::try_exists(&base_dir).await.unwrap_or(false) {
let _ = tokio::fs::remove_dir_all(&base_dir).await;
}
Ok(())
}
/// Split an absolute filesystem path into `(parent_dir, file_name)`.
pub fn split_abs_path(path: &str) -> AnyResult<(PathBuf, String)> {
let pb = PathBuf::from(path);
@@ -198,7 +466,6 @@ mod tests {
use super::*;
use crate::utils::config::{PdfIngestMode::LlmFirst, StorageKind};
use bytes::Bytes;
use futures::TryStreamExt;
use uuid::Uuid;
fn test_config(root: &str) -> AppConfig {
@@ -218,68 +485,353 @@ mod tests {
}
}
fn test_config_memory() -> AppConfig {
AppConfig {
openai_api_key: "test".into(),
surrealdb_address: "test".into(),
surrealdb_username: "test".into(),
surrealdb_password: "test".into(),
surrealdb_namespace: "test".into(),
surrealdb_database: "test".into(),
data_dir: "/tmp/unused".into(), // Ignored for memory storage
http_port: 0,
openai_base_url: "..".into(),
storage: StorageKind::Memory,
pdf_ingest_mode: LlmFirst,
..Default::default()
}
}
#[tokio::test]
async fn test_build_store_root_creates_base() {
let base = format!("/tmp/minne_store_test_{}", Uuid::new_v4());
async fn test_storage_manager_memory_basic_operations() {
let cfg = test_config_memory();
let storage = StorageManager::new(&cfg)
.await
.expect("create storage manager");
assert!(storage.local_base_path().is_none());
let location = "test/data/file.txt";
let data = b"test data for storage manager";
// Test put and get
storage
.put(location, Bytes::from(data.to_vec()))
.await
.expect("put");
let retrieved = storage.get(location).await.expect("get");
assert_eq!(retrieved.as_ref(), data);
// Test exists
assert!(storage.exists(location).await.expect("exists check"));
// Test delete
storage.delete_prefix("test/data/").await.expect("delete");
assert!(!storage
.exists(location)
.await
.expect("exists check after delete"));
}
#[tokio::test]
async fn test_storage_manager_local_basic_operations() {
let base = format!("/tmp/minne_storage_test_{}", Uuid::new_v4());
let cfg = test_config(&base);
let _ = build_store_root(&cfg).await.expect("build store root");
assert!(tokio::fs::try_exists(&base).await.unwrap_or(false));
let storage = StorageManager::new(&cfg)
.await
.expect("create storage manager");
let resolved_base = storage
.local_base_path()
.expect("resolved base dir")
.to_path_buf();
assert_eq!(resolved_base, PathBuf::from(&base));
let location = "test/data/file.txt";
let data = b"test data for local storage";
// Test put and get
storage
.put(location, Bytes::from(data.to_vec()))
.await
.expect("put");
let retrieved = storage.get(location).await.expect("get");
assert_eq!(retrieved.as_ref(), data);
let object_dir = resolved_base.join("test/data");
tokio::fs::metadata(&object_dir)
.await
.expect("object directory exists after write");
// Test exists
assert!(storage.exists(location).await.expect("exists check"));
// Test delete
storage.delete_prefix("test/data/").await.expect("delete");
assert!(!storage
.exists(location)
.await
.expect("exists check after delete"));
assert!(
tokio::fs::metadata(&object_dir).await.is_err(),
"object directory should be removed"
);
tokio::fs::metadata(&resolved_base)
.await
.expect("base directory remains intact");
// Clean up
let _ = tokio::fs::remove_dir_all(&base).await;
}
#[tokio::test]
async fn test_put_get_bytes_at_and_delete_prefix_at() {
let base = format!("/tmp/minne_store_test_{}", Uuid::new_v4());
let cfg = test_config(&base);
let location_prefix = format!("{}/{}", "user1", Uuid::new_v4());
let file_name = "file.txt";
let location = format!("{}/{}", &location_prefix, file_name);
let payload = Bytes::from_static(b"hello world");
put_bytes_at(&location, payload.clone(), &cfg)
async fn test_storage_manager_memory_persistence() {
let cfg = test_config_memory();
let storage = StorageManager::new(&cfg)
.await
.expect("put");
let got = get_bytes_at(&location, &cfg).await.expect("get");
assert_eq!(got.as_ref(), payload.as_ref());
.expect("create storage manager");
// Delete the whole prefix and ensure retrieval fails
delete_prefix_at(&location_prefix, &cfg)
let location = "persistence/test.txt";
let data1 = b"first data";
let data2 = b"second data";
// Put first data
storage
.put(location, Bytes::from(data1.to_vec()))
.await
.expect("delete prefix");
assert!(get_bytes_at(&location, &cfg).await.is_err());
.expect("put first");
let _ = tokio::fs::remove_dir_all(&base).await;
// Retrieve and verify first data
let retrieved1 = storage.get(location).await.expect("get first");
assert_eq!(retrieved1.as_ref(), data1);
// Overwrite with second data
storage
.put(location, Bytes::from(data2.to_vec()))
.await
.expect("put second");
// Retrieve and verify second data
let retrieved2 = storage.get(location).await.expect("get second");
assert_eq!(retrieved2.as_ref(), data2);
// Data persists across multiple operations using the same StorageManager
assert_ne!(retrieved1.as_ref(), retrieved2.as_ref());
}
#[tokio::test]
async fn test_get_stream_at() {
let base = format!("/tmp/minne_store_test_{}", Uuid::new_v4());
let cfg = test_config(&base);
async fn test_storage_manager_list_operations() {
let cfg = test_config_memory();
let storage = StorageManager::new(&cfg)
.await
.expect("create storage manager");
let location = format!("{}/{}/stream.bin", "user2", Uuid::new_v4());
let content = vec![7u8; 32 * 1024]; // 32KB payload
// Create multiple files
let files = vec![
("dir1/file1.txt", b"content1"),
("dir1/file2.txt", b"content2"),
("dir2/file3.txt", b"content3"),
];
put_bytes_at(&location, Bytes::from(content.clone()), &cfg)
for (location, data) in &files {
storage
.put(location, Bytes::from(data.to_vec()))
.await
.expect("put");
}
// Test listing without prefix
let all_files = storage.list(None).await.expect("list all");
assert_eq!(all_files.len(), 3);
// Test listing with prefix
let dir1_files = storage.list(Some("dir1/")).await.expect("list dir1");
assert_eq!(dir1_files.len(), 2);
assert!(dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file1.txt")));
assert!(dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file2.txt")));
// Test listing non-existent prefix
let empty_files = storage
.list(Some("nonexistent/"))
.await
.expect("list nonexistent");
assert_eq!(empty_files.len(), 0);
}
#[tokio::test]
async fn test_storage_manager_stream_operations() {
let cfg = test_config_memory();
let storage = StorageManager::new(&cfg)
.await
.expect("create storage manager");
let location = "stream/test.bin";
let content = vec![42u8; 1024 * 64]; // 64KB of data
// Put large data
storage
.put(location, Bytes::from(content.clone()))
.await
.expect("put large data");
// Get as stream
let mut stream = storage.get_stream(location).await.expect("get stream");
let mut collected = Vec::new();
while let Some(chunk) = stream.next().await {
let chunk = chunk.expect("stream chunk");
collected.extend_from_slice(&chunk);
}
assert_eq!(collected, content);
}
#[tokio::test]
async fn test_storage_manager_with_custom_backend() {
use object_store::memory::InMemory;
// Create custom memory backend
let custom_store = InMemory::new();
let storage = StorageManager::with_backend(Arc::new(custom_store), StorageKind::Memory);
let location = "custom/test.txt";
let data = b"custom backend test";
// Test operations with custom backend
storage
.put(location, Bytes::from(data.to_vec()))
.await
.expect("put");
let retrieved = storage.get(location).await.expect("get");
assert_eq!(retrieved.as_ref(), data);
let stream = get_stream_at(&location, &cfg).await.expect("stream");
let combined: Vec<u8> = stream
.map_ok(|chunk| chunk.to_vec())
.try_fold(Vec::new(), |mut acc, mut chunk| async move {
acc.append(&mut chunk);
Ok(acc)
})
assert!(storage.exists(location).await.expect("exists"));
assert_eq!(*storage.backend_kind(), StorageKind::Memory);
}
#[tokio::test]
async fn test_storage_manager_error_handling() {
let cfg = test_config_memory();
let storage = StorageManager::new(&cfg)
.await
.expect("collect");
.expect("create storage manager");
assert_eq!(combined, content);
// Test getting non-existent file
let result = storage.get("nonexistent.txt").await;
assert!(result.is_err());
delete_prefix_at(&split_object_path(&location).unwrap().0, &cfg)
// Test checking existence of non-existent file
let exists = storage
.exists("nonexistent.txt")
.await
.ok();
.expect("exists check");
assert!(!exists);
let _ = tokio::fs::remove_dir_all(&base).await;
// Test listing with invalid location (should not panic)
let _result = storage.get("").await;
// This may or may not error depending on the backend implementation
// The important thing is that it doesn't panic
}
// TestStorageManager tests
#[tokio::test]
async fn test_test_storage_manager_memory() {
let test_storage = testing::TestStorageManager::new_memory()
.await
.expect("create test storage");
let location = "test/storage/file.txt";
let data = b"test data with TestStorageManager";
// Test put and get
test_storage.put(location, data).await.expect("put");
let retrieved = test_storage.get(location).await.expect("get");
assert_eq!(retrieved.as_ref(), data);
// Test existence check
assert!(test_storage.exists(location).await.expect("exists"));
// Test list
let files = test_storage
.list(Some("test/storage/"))
.await
.expect("list");
assert_eq!(files.len(), 1);
// Test delete
test_storage
.delete_prefix("test/storage/")
.await
.expect("delete");
assert!(!test_storage
.exists(location)
.await
.expect("exists after delete"));
}
#[tokio::test]
async fn test_test_storage_manager_local() {
let test_storage = testing::TestStorageManager::new_local()
.await
.expect("create test storage");
let location = "test/local/file.txt";
let data = b"test data with local TestStorageManager";
// Test put and get
test_storage.put(location, data).await.expect("put");
let retrieved = test_storage.get(location).await.expect("get");
assert_eq!(retrieved.as_ref(), data);
// Test existence check
assert!(test_storage.exists(location).await.expect("exists"));
// The storage should be automatically cleaned up when test_storage is dropped
}
#[tokio::test]
async fn test_test_storage_manager_isolation() {
let storage1 = testing::TestStorageManager::new_memory()
.await
.expect("create test storage 1");
let storage2 = testing::TestStorageManager::new_memory()
.await
.expect("create test storage 2");
let location = "isolation/test.txt";
let data1 = b"storage 1 data";
let data2 = b"storage 2 data";
// Put different data in each storage
storage1.put(location, data1).await.expect("put storage 1");
storage2.put(location, data2).await.expect("put storage 2");
// Verify isolation
let retrieved1 = storage1.get(location).await.expect("get storage 1");
let retrieved2 = storage2.get(location).await.expect("get storage 2");
assert_eq!(retrieved1.as_ref(), data1);
assert_eq!(retrieved2.as_ref(), data2);
assert_ne!(retrieved1.as_ref(), retrieved2.as_ref());
}
#[tokio::test]
async fn test_test_storage_manager_config() {
let cfg = testing::test_config_memory();
let test_storage = testing::TestStorageManager::with_config(&cfg)
.await
.expect("create test storage with config");
let location = "config/test.txt";
let data = b"test data with custom config";
test_storage.put(location, data).await.expect("put");
let retrieved = test_storage.get(location).await.expect("get");
assert_eq!(retrieved.as_ref(), data);
// Verify it's using memory backend
assert_eq!(*test_storage.storage().backend_kind(), StorageKind::Memory);
}
}

View File

@@ -1,4 +1,5 @@
use axum_typed_multipart::FieldData;
use bytes;
use mime_guess::from_path;
use object_store::Error as ObjectStoreError;
use sha2::{Digest, Sha256};
@@ -13,9 +14,8 @@ use uuid::Uuid;
use crate::{
error::AppError,
storage::{db::SurrealDbClient, store},
storage::{db::SurrealDbClient, store, store::StorageManager},
stored_object,
utils::config::AppConfig,
};
#[derive(Error, Debug)]
@@ -51,54 +51,6 @@ stored_object!(FileInfo, "file", {
});
impl FileInfo {
pub async fn new(
field_data: FieldData<NamedTempFile>,
db_client: &SurrealDbClient,
user_id: &str,
config: &AppConfig,
) -> Result<Self, FileError> {
let file = field_data.contents;
let file_name = field_data
.metadata
.file_name
.ok_or(FileError::MissingFileName)?;
// Calculate SHA256
let sha256 = Self::get_sha(&file).await?;
// Early return if file already exists
match Self::get_by_sha(&sha256, db_client).await {
Ok(existing_file) => {
info!("File already exists with SHA256: {}", sha256);
return Ok(existing_file);
}
Err(FileError::FileNotFound(_)) => (), // Expected case for new files
Err(e) => return Err(e), // Propagate unexpected errors
}
// Generate UUID and prepare paths
let uuid = Uuid::new_v4();
let sanitized_file_name = Self::sanitize_file_name(&file_name);
let now = Utc::now();
// Create new FileInfo instance
let file_info = Self {
id: uuid.to_string(),
created_at: now,
updated_at: now,
file_name,
sha256,
path: Self::persist_file(&uuid, file, &sanitized_file_name, user_id, config).await?,
mime_type: Self::guess_mime_type(Path::new(&sanitized_file_name)),
user_id: user_id.to_string(),
};
// Store in database
db_client.store_item(file_info.clone()).await?;
Ok(file_info)
}
/// Guesses the MIME type based on the file extension.
///
/// # Arguments
@@ -167,36 +119,6 @@ impl FileInfo {
}
}
/// Persists the file under the logical location `{user_id}/{uuid}/{file_name}`.
///
/// # Arguments
/// * `uuid` - The UUID of the file.
/// * `file` - The temporary file to persist.
/// * `file_name` - The sanitized file name.
/// * `user-id` - User id
/// * `config` - Application configuration containing data directory path
///
/// # Returns
/// * `Result<String, FileError>` - The logical object location or an error.
async fn persist_file(
uuid: &Uuid,
file: NamedTempFile,
file_name: &str,
user_id: &str,
config: &AppConfig,
) -> Result<String, FileError> {
// Logical object location relative to the store root
let location = format!("{}/{}/{}", user_id, uuid, file_name);
info!("Persisting to object location: {}", location);
let bytes = tokio::fs::read(file.path()).await?;
store::put_bytes_at(&location, bytes.into(), config)
.await
.map_err(FileError::from)?;
Ok(location)
}
/// Retrieves a `FileInfo` by SHA256.
///
/// # Arguments
@@ -215,41 +137,6 @@ impl FileInfo {
.ok_or(FileError::FileNotFound(sha256.to_string()))
}
/// Removes FileInfo from database and file from disk
///
/// # Arguments
/// * `id` - Id of the FileInfo
/// * `db_client` - Reference to SurrealDbClient
///
/// # Returns
/// `Result<(), FileError>`
pub async fn delete_by_id(
id: &str,
db_client: &SurrealDbClient,
config: &AppConfig,
) -> Result<(), AppError> {
// Get the FileInfo from the database
let Some(file_info) = db_client.get_item::<FileInfo>(id).await? else {
return Ok(());
};
// Remove the object's parent prefix in the object store
let (parent_prefix, _file_name) = store::split_object_path(&file_info.path)
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
store::delete_prefix_at(&parent_prefix, config)
.await
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
info!(
"Removed object prefix {} and its contents via object_store",
parent_prefix
);
// Delete the FileInfo from the database
db_client.delete_item::<FileInfo>(id).await?;
Ok(())
}
/// Retrieves a `FileInfo` by its ID.
///
/// # Arguments
@@ -265,34 +152,168 @@ impl FileInfo {
Err(e) => Err(FileError::SurrealError(e)),
}
}
/// Create a new FileInfo using StorageManager for persistent storage operations.
///
/// # Arguments
/// * `field_data` - The uploaded file data
/// * `db_client` - Reference to the SurrealDbClient
/// * `user_id` - The user ID
/// * `storage` - A StorageManager instance for storage operations
///
/// # Returns
/// * `Result<Self, FileError>` - The created FileInfo or an error
pub async fn new_with_storage(
field_data: FieldData<NamedTempFile>,
db_client: &SurrealDbClient,
user_id: &str,
storage: &StorageManager,
) -> Result<Self, FileError> {
let file = field_data.contents;
let file_name = field_data
.metadata
.file_name
.ok_or(FileError::MissingFileName)?;
let original_file_name = file_name.clone();
// Calculate SHA256
let sha256 = Self::get_sha(&file).await?;
// Early return if file already exists
match Self::get_by_sha(&sha256, db_client).await {
Ok(existing_file) => {
info!("File already exists with SHA256: {}", sha256);
return Ok(existing_file);
}
Err(FileError::FileNotFound(_)) => (), // Expected case for new files
Err(e) => return Err(e), // Propagate unexpected errors
}
// Generate UUID and prepare paths
let uuid = Uuid::new_v4();
let sanitized_file_name = Self::sanitize_file_name(&file_name);
let now = Utc::now();
let path =
Self::persist_file_with_storage(&uuid, file, &sanitized_file_name, user_id, storage)
.await?;
// Create FileInfo struct
let file_info = FileInfo {
id: uuid.to_string(),
user_id: user_id.to_string(),
sha256,
file_name: original_file_name,
path,
mime_type: Self::guess_mime_type(Path::new(&file_name)),
created_at: now,
updated_at: now,
};
// Store in database
db_client
.store_item(file_info.clone())
.await
.map_err(FileError::SurrealError)?;
Ok(file_info)
}
/// Delete a FileInfo by ID using StorageManager for storage operations.
///
/// # Arguments
/// * `id` - ID of the FileInfo
/// * `db_client` - Reference to SurrealDbClient
/// * `storage` - A StorageManager instance for storage operations
///
/// # Returns
/// * `Result<(), AppError>` - Success or error
pub async fn delete_by_id_with_storage(
id: &str,
db_client: &SurrealDbClient,
storage: &StorageManager,
) -> Result<(), AppError> {
// Get the FileInfo from the database
let Some(file_info) = db_client.get_item::<FileInfo>(id).await? else {
return Ok(());
};
// Remove the object's parent prefix in the object store
let (parent_prefix, _file_name) = store::split_object_path(&file_info.path)
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
storage
.delete_prefix(&parent_prefix)
.await
.map_err(|e| AppError::from(anyhow::anyhow!(e)))?;
info!(
"Removed object prefix {} and its contents via StorageManager",
parent_prefix
);
// Delete the FileInfo from the database
db_client.delete_item::<FileInfo>(id).await?;
Ok(())
}
/// Retrieve file content using StorageManager for storage operations.
///
/// # Arguments
/// * `storage` - A StorageManager instance for storage operations
///
/// # Returns
/// * `Result<bytes::Bytes, AppError>` - The file content or an error
pub async fn get_content_with_storage(
&self,
storage: &StorageManager,
) -> Result<bytes::Bytes, AppError> {
storage
.get(&self.path)
.await
.map_err(|e: object_store::Error| AppError::from(anyhow::anyhow!(e)))
}
/// Persist file to storage using StorageManager.
///
/// # Arguments
/// * `uuid` - The UUID for the file
/// * `file` - The temporary file to persist
/// * `file_name` - The name of the file
/// * `user_id` - The user ID
/// * `storage` - A StorageManager instance for storage operations
///
/// # Returns
/// * `Result<String, FileError>` - The logical object location or an error.
async fn persist_file_with_storage(
uuid: &Uuid,
file: NamedTempFile,
file_name: &str,
user_id: &str,
storage: &StorageManager,
) -> Result<String, FileError> {
// Logical object location relative to the store root
let location = format!("{}/{}/{}", user_id, uuid, file_name);
info!("Persisting to object location: {}", location);
let bytes = tokio::fs::read(file.path()).await?;
storage
.put(&location, bytes.into())
.await
.map_err(FileError::from)?;
Ok(location)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::config::{AppConfig, PdfIngestMode::LlmFirst, StorageKind};
use crate::storage::store::testing::TestStorageManager;
use axum::http::HeaderMap;
use axum_typed_multipart::FieldMetadata;
use std::io::Write;
use std::{io::Write, path::Path};
use tempfile::NamedTempFile;
fn test_config(data_dir: &str) -> AppConfig {
AppConfig {
data_dir: data_dir.to_string(),
openai_api_key: "test_key".to_string(),
surrealdb_address: "test_address".to_string(),
surrealdb_username: "test_user".to_string(),
surrealdb_password: "test_pass".to_string(),
surrealdb_namespace: "test_ns".to_string(),
surrealdb_database: "test_db".to_string(),
http_port: 3000,
openai_base_url: "..".to_string(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
..Default::default()
}
}
/// Creates a test temporary file with the given content
fn create_test_file(content: &[u8], file_name: &str) -> FieldData<NamedTempFile> {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
@@ -316,33 +337,39 @@ mod tests {
}
#[tokio::test]
async fn test_fileinfo_create_read_delete() {
// Setup in-memory database for testing
async fn test_fileinfo_create_read_delete_with_storage_manager() {
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations().await.unwrap();
// Create a test file
let content = b"This is a test file for cross-filesystem operations";
let file_name = "cross_fs_test.txt";
let content = b"This is a test file for StorageManager operations";
let file_name = "storage_manager_test.txt";
let field_data = create_test_file(content, file_name);
// Create a FileInfo instance with data_dir in /tmp
// Create test storage manager (memory backend)
let test_storage = store::testing::TestStorageManager::new_memory()
.await
.expect("Failed to create test storage manager");
// Create a FileInfo instance with storage manager
let user_id = "test_user";
let config = test_config("/tmp/minne_test_data");
// Test file creation
let file_info = FileInfo::new(field_data, &db, user_id, &config)
.await
.expect("Failed to create file across filesystems");
// Test file creation with StorageManager
let file_info =
FileInfo::new_with_storage(field_data, &db, user_id, test_storage.storage())
.await
.expect("Failed to create file with StorageManager");
assert_eq!(file_info.file_name, file_name);
// Verify the file exists via object_store and has correct content
let bytes = store::get_bytes_at(&file_info.path, &config)
// Verify the file exists via StorageManager and has correct content
let bytes = file_info
.get_content_with_storage(test_storage.storage())
.await
.expect("Failed to read file content via object_store");
assert_eq!(bytes, content.as_slice());
.expect("Failed to read file content via StorageManager");
assert_eq!(bytes.as_ref(), content);
// Test file reading
let retrieved = FileInfo::get_by_id(&file_info.id, &db)
@@ -350,51 +377,89 @@ mod tests {
.expect("Failed to retrieve file info");
assert_eq!(retrieved.id, file_info.id);
assert_eq!(retrieved.sha256, file_info.sha256);
assert_eq!(retrieved.file_name, file_name);
// Test file deletion
FileInfo::delete_by_id(&file_info.id, &db, &config)
// Test file deletion with StorageManager
FileInfo::delete_by_id_with_storage(&file_info.id, &db, test_storage.storage())
.await
.expect("Failed to delete file");
assert!(
store::get_bytes_at(&file_info.path, &config).await.is_err(),
"File should be deleted"
);
.expect("Failed to delete file with StorageManager");
// Clean up the test directory
let _ = tokio::fs::remove_dir_all(&config.data_dir).await;
let deleted_result = file_info
.get_content_with_storage(test_storage.storage())
.await;
assert!(deleted_result.is_err(), "File should be deleted");
// No cleanup needed - TestStorageManager handles it automatically
}
#[tokio::test]
async fn test_fileinfo_duplicate_detection() {
// Setup in-memory database for testing
async fn test_fileinfo_preserves_original_filename_and_sanitizes_path() {
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations().await.unwrap();
// Create a test file
let content = b"This is a test file for cross-filesystem duplicate detection";
let file_name = "cross_fs_duplicate.txt";
let content = b"filename sanitization";
let original_name = "Complex name (1).txt";
let expected_sanitized = "Complex_name__1_.txt";
let field_data = create_test_file(content, original_name);
let test_storage = store::testing::TestStorageManager::new_memory()
.await
.expect("Failed to create test storage manager");
let file_info =
FileInfo::new_with_storage(field_data, &db, "sanitized_user", test_storage.storage())
.await
.expect("Failed to create file via storage manager");
assert_eq!(file_info.file_name, original_name);
let stored_name = Path::new(&file_info.path)
.file_name()
.and_then(|name| name.to_str())
.expect("stored name");
assert_eq!(stored_name, expected_sanitized);
}
#[tokio::test]
async fn test_fileinfo_duplicate_detection_with_storage_manager() {
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations().await.unwrap();
let content = b"This is a test file for StorageManager duplicate detection";
let file_name = "storage_manager_duplicate.txt";
let field_data = create_test_file(content, file_name);
// Create a FileInfo instance with data_dir in /tmp
// Create test storage manager
let test_storage = store::testing::TestStorageManager::new_memory()
.await
.expect("Failed to create test storage manager");
// Create a FileInfo instance with storage manager
let user_id = "test_user";
let config = test_config("/tmp/minne_test_data");
// Store the original file
let original_file_info = FileInfo::new(field_data, &db, user_id, &config)
.await
.expect("Failed to create original file");
let original_file_info =
FileInfo::new_with_storage(field_data, &db, user_id, test_storage.storage())
.await
.expect("Failed to create original file with StorageManager");
// Create another file with the same content but different name
let duplicate_name = "cross_fs_duplicate_2.txt";
let duplicate_name = "storage_manager_duplicate_2.txt";
let field_data2 = create_test_file(content, duplicate_name);
// The system should detect it's the same file and return the original FileInfo
let duplicate_file_info = FileInfo::new(field_data2, &db, user_id, &config)
.await
.expect("Failed to process duplicate file");
let duplicate_file_info =
FileInfo::new_with_storage(field_data2, &db, user_id, test_storage.storage())
.await
.expect("Failed to process duplicate file with StorageManager");
// Verify duplicate detection worked
assert_eq!(duplicate_file_info.id, original_file_info.id);
@@ -402,34 +467,48 @@ mod tests {
assert_eq!(duplicate_file_info.file_name, file_name);
assert_ne!(duplicate_file_info.file_name, duplicate_name);
// Clean up
FileInfo::delete_by_id(&original_file_info.id, &db, &config)
// Verify both files have the same content (they should point to the same file)
let original_content = original_file_info
.get_content_with_storage(test_storage.storage())
.await
.expect("Failed to delete file");
let _ = tokio::fs::remove_dir_all(&config.data_dir).await;
.unwrap();
let duplicate_content = duplicate_file_info
.get_content_with_storage(test_storage.storage())
.await
.unwrap();
assert_eq!(original_content.as_ref(), content);
assert_eq!(duplicate_content.as_ref(), content);
// Clean up
FileInfo::delete_by_id_with_storage(&original_file_info.id, &db, test_storage.storage())
.await
.expect("Failed to delete original file with StorageManager");
}
#[tokio::test]
async fn test_file_creation() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
// Create a test file
let content = b"This is a test file content";
let file_name = "test_file.txt";
let field_data = create_test_file(content, file_name);
// Create a FileInfo instance
// Create a FileInfo instance with StorageManager
let user_id = "test_user";
let config = test_config("./data");
let file_info = FileInfo::new(field_data, &db, user_id, &config).await;
let test_storage = TestStorageManager::new_memory()
.await
.expect("create test storage manager");
let file_info =
FileInfo::new_with_storage(field_data, &db, user_id, test_storage.storage()).await;
// We can't fully test persistence to disk in unit tests,
// but we can verify the database record was created
// Verify the FileInfo was created successfully
assert!(file_info.is_ok());
let file_info = file_info.unwrap();
@@ -459,33 +538,39 @@ mod tests {
#[tokio::test]
async fn test_file_duplicate_detection() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
// First, store a file with known content
let content = b"This is a test file for duplicate detection";
let file_name = "original.txt";
let user_id = "test_user";
let config = test_config("./data");
let test_storage = TestStorageManager::new_memory()
.await
.expect("create test storage manager");
let field_data1 = create_test_file(content, file_name);
let original_file_info = FileInfo::new(field_data1, &db, user_id, &config)
.await
.expect("Failed to create original file");
let original_file_info =
FileInfo::new_with_storage(field_data1, &db, user_id, test_storage.storage())
.await
.expect("Failed to create original file");
// Now try to store another file with the same content but different name
let duplicate_name = "duplicate.txt";
let field_data2 = create_test_file(content, duplicate_name);
// The system should detect it's the same file and return the original FileInfo
let duplicate_file_info = FileInfo::new(field_data2, &db, user_id, &config)
.await
.expect("Failed to process duplicate file");
let duplicate_file_info =
FileInfo::new_with_storage(field_data2, &db, user_id, test_storage.storage())
.await
.expect("Failed to process duplicate file");
// The returned FileInfo should match the original
assert_eq!(duplicate_file_info.id, original_file_info.id);
@@ -553,7 +638,6 @@ mod tests {
#[tokio::test]
async fn test_get_by_sha_not_found() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
@@ -574,7 +658,6 @@ mod tests {
#[tokio::test]
async fn test_manual_file_info_creation() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
@@ -615,23 +698,28 @@ mod tests {
#[tokio::test]
async fn test_delete_by_id() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
// Create and persist a test file via FileInfo::new
// Create and persist a test file via FileInfo::new_with_storage
let user_id = "user123";
let cfg = test_config("./data");
let test_storage = TestStorageManager::new_memory()
.await
.expect("create test storage manager");
let temp = create_test_file(b"test content", "test_file.txt");
let file_info = FileInfo::new(temp, &db, user_id, &cfg)
let file_info = FileInfo::new_with_storage(temp, &db, user_id, test_storage.storage())
.await
.expect("create file");
// Delete the file
let delete_result = FileInfo::delete_by_id(&file_info.id, &db, &cfg).await;
// Delete the file using StorageManager
let delete_result =
FileInfo::delete_by_id_with_storage(&file_info.id, &db, test_storage.storage()).await;
// Delete should be successful
assert!(
@@ -650,13 +738,12 @@ mod tests {
"FileInfo should be deleted from the database"
);
// Verify content no longer retrievable
assert!(store::get_bytes_at(&file_info.path, &cfg).await.is_err());
// Verify content no longer retrievable from storage
assert!(test_storage.storage().get(&file_info.path).await.is_err());
}
#[tokio::test]
async fn test_delete_by_id_not_found() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
@@ -664,19 +751,16 @@ mod tests {
.expect("Failed to start in-memory surrealdb");
// Try to delete a file that doesn't exist
let result = FileInfo::delete_by_id(
"nonexistent_id",
&db,
&test_config("./data"),
)
.await;
let test_storage = TestStorageManager::new_memory().await.unwrap();
let result =
FileInfo::delete_by_id_with_storage("nonexistent_id", &db, test_storage.storage())
.await;
// Should succeed even if the file record does not exist
assert!(result.is_ok());
}
#[tokio::test]
async fn test_get_by_id() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
@@ -717,7 +801,6 @@ mod tests {
#[tokio::test]
async fn test_get_by_id_not_found() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
@@ -741,43 +824,197 @@ mod tests {
}
}
// StorageManager-based tests
#[tokio::test]
async fn test_fileinfo_persist_with_custom_root() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
async fn test_file_info_new_with_storage_memory() {
// Setup
let db = SurrealDbClient::memory("test_ns", "test_file_storage_memory")
.await
.expect("Failed to start in-memory surrealdb");
.unwrap();
db.apply_migrations().await.unwrap();
// Create a test file
let content = b"This is a test file for data directory configuration";
let file_name = "data_dir_test.txt";
let field_data = create_test_file(content, file_name);
// Create a FileInfo instance with a custom data directory
let content = b"This is a test file for StorageManager";
let field_data = create_test_file(content, "test_storage.txt");
let user_id = "test_user";
let custom_data_dir = "/tmp/minne_custom_data_dir";
let config = test_config(custom_data_dir);
// Test file creation
let file_info = FileInfo::new(field_data, &db, user_id, &config)
// Create test storage manager
let storage = store::testing::TestStorageManager::new_memory()
.await
.expect("Failed to create file in custom data directory");
.unwrap();
// Verify the file has the correct content via object_store
let file_content = store::get_bytes_at(&file_info.path, &config)
// Test file creation with StorageManager
let file_info = FileInfo::new_with_storage(field_data, &db, user_id, storage.storage())
.await
.expect("Failed to read file content");
assert_eq!(file_content.as_ref(), content);
.expect("Failed to create file with StorageManager");
// Test file deletion
FileInfo::delete_by_id(&file_info.id, &db, &config)
// Verify the file was created correctly
assert_eq!(file_info.user_id, user_id);
assert_eq!(file_info.file_name, "test_storage.txt");
assert!(!file_info.sha256.is_empty());
assert!(!file_info.path.is_empty());
// Test content retrieval with StorageManager
let retrieved_content = file_info
.get_content_with_storage(storage.storage())
.await
.expect("Failed to delete file");
assert!(store::get_bytes_at(&file_info.path, &config).await.is_err());
.expect("Failed to get file content with StorageManager");
assert_eq!(retrieved_content.as_ref(), content);
// Clean up the test directory
let _ = tokio::fs::remove_dir_all(custom_data_dir).await;
// Test file deletion with StorageManager
FileInfo::delete_by_id_with_storage(&file_info.id, &db, storage.storage())
.await
.expect("Failed to delete file with StorageManager");
// Verify file is deleted
let deleted_content_result = file_info.get_content_with_storage(storage.storage()).await;
assert!(deleted_content_result.is_err());
}
#[tokio::test]
async fn test_file_info_new_with_storage_local() {
// Setup
let db = SurrealDbClient::memory("test_ns", "test_file_storage_local")
.await
.unwrap();
db.apply_migrations().await.unwrap();
let content = b"This is a test file for StorageManager with local storage";
let field_data = create_test_file(content, "test_local.txt");
let user_id = "test_user";
// Create test storage manager with local backend
let storage = store::testing::TestStorageManager::new_local()
.await
.unwrap();
// Test file creation with StorageManager
let file_info = FileInfo::new_with_storage(field_data, &db, user_id, storage.storage())
.await
.expect("Failed to create file with StorageManager");
// Verify the file was created correctly
assert_eq!(file_info.user_id, user_id);
assert_eq!(file_info.file_name, "test_local.txt");
assert!(!file_info.sha256.is_empty());
assert!(!file_info.path.is_empty());
// Test content retrieval with StorageManager
let retrieved_content = file_info
.get_content_with_storage(storage.storage())
.await
.expect("Failed to get file content with StorageManager");
assert_eq!(retrieved_content.as_ref(), content);
// Test file deletion with StorageManager
FileInfo::delete_by_id_with_storage(&file_info.id, &db, storage.storage())
.await
.expect("Failed to delete file with StorageManager");
// Verify file is deleted
let deleted_content_result = file_info.get_content_with_storage(storage.storage()).await;
assert!(deleted_content_result.is_err());
}
#[tokio::test]
async fn test_file_info_storage_manager_persistence() {
// Setup
let db = SurrealDbClient::memory("test_ns", "test_file_persistence")
.await
.unwrap();
db.apply_migrations().await.unwrap();
let content = b"Test content for persistence";
let field_data = create_test_file(content, "persistence_test.txt");
let user_id = "test_user";
// Create test storage manager
let storage = store::testing::TestStorageManager::new_memory()
.await
.unwrap();
// Create file
let file_info = FileInfo::new_with_storage(field_data, &db, user_id, storage.storage())
.await
.expect("Failed to create file");
// Test that data persists across multiple operations with the same StorageManager
let retrieved_content_1 = file_info
.get_content_with_storage(storage.storage())
.await
.unwrap();
let retrieved_content_2 = file_info
.get_content_with_storage(storage.storage())
.await
.unwrap();
assert_eq!(retrieved_content_1.as_ref(), content);
assert_eq!(retrieved_content_2.as_ref(), content);
// Test that different StorageManager instances don't share data (memory storage isolation)
let storage2 = store::testing::TestStorageManager::new_memory()
.await
.unwrap();
let isolated_content_result = file_info.get_content_with_storage(storage2.storage()).await;
assert!(
isolated_content_result.is_err(),
"Different StorageManager should not have access to same data"
);
}
#[tokio::test]
async fn test_file_info_storage_manager_equivalence() {
// Setup
let db = SurrealDbClient::memory("test_ns", "test_file_equivalence")
.await
.unwrap();
db.apply_migrations().await.unwrap();
let content = b"Test content for equivalence testing";
let field_data1 = create_test_file(content, "equivalence_test_1.txt");
let field_data2 = create_test_file(content, "equivalence_test_2.txt");
let user_id = "test_user";
// Create single storage manager and reuse it
let storage_manager = store::testing::TestStorageManager::new_memory()
.await
.unwrap();
let storage = storage_manager.storage();
// Create multiple files with the same storage manager
let file_info_1 = FileInfo::new_with_storage(field_data1, &db, user_id, &storage)
.await
.expect("Failed to create file 1");
let file_info_2 = FileInfo::new_with_storage(field_data2, &db, user_id, &storage)
.await
.expect("Failed to create file 2");
// Test that both files can be retrieved with the same storage backend
let content_1 = file_info_1
.get_content_with_storage(&storage)
.await
.unwrap();
let content_2 = file_info_2
.get_content_with_storage(&storage)
.await
.unwrap();
assert_eq!(content_1.as_ref(), content);
assert_eq!(content_2.as_ref(), content);
// Test that files can be deleted with the same storage manager
FileInfo::delete_by_id_with_storage(&file_info_1.id, &db, &storage)
.await
.unwrap();
FileInfo::delete_by_id_with_storage(&file_info_2.id, &db, &storage)
.await
.unwrap();
// Verify files are deleted
let deleted_content_1 = file_info_1.get_content_with_storage(&storage).await;
let deleted_content_2 = file_info_2.get_content_with_storage(&storage).await;
assert!(deleted_content_1.is_err());
assert!(deleted_content_2.is_err());
}
}

View File

@@ -459,4 +459,44 @@ mod tests {
let retrieved: Option<Scratchpad> = db.get_item(&scratchpad_id).await.unwrap();
assert!(retrieved.is_some());
}
#[tokio::test]
async fn test_timezone_aware_scratchpad_conversion() {
let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string())
.await
.expect("Failed to create test database");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
let user_id = "test_user_123";
let scratchpad =
Scratchpad::new(user_id.to_string(), "Test Timezone Scratchpad".to_string());
let scratchpad_id = scratchpad.id.clone();
db.store_item(scratchpad).await.unwrap();
let retrieved = Scratchpad::get_by_id(&scratchpad_id, user_id, &db)
.await
.unwrap();
// Test that datetime fields are preserved and can be used for timezone formatting
assert!(retrieved.created_at.timestamp() > 0);
assert!(retrieved.updated_at.timestamp() > 0);
assert!(retrieved.last_saved_at.timestamp() > 0);
// Test that optional datetime fields work correctly
assert!(retrieved.archived_at.is_none());
assert!(retrieved.ingested_at.is_none());
// Archive the scratchpad to test optional datetime handling
let archived = Scratchpad::archive(&scratchpad_id, user_id, &db, false)
.await
.unwrap();
assert!(archived.archived_at.is_some());
assert!(archived.archived_at.unwrap().timestamp() > 0);
assert!(archived.ingested_at.is_none());
}
}

View File

@@ -2,10 +2,11 @@ use config::{Config, ConfigError, Environment, File};
use serde::Deserialize;
use std::env;
#[derive(Clone, Deserialize, Debug)]
#[derive(Clone, Deserialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum StorageKind {
Local,
Memory,
}
fn default_storage_kind() -> StorageKind {

View File

@@ -26,6 +26,7 @@
src = ./.;
filter = let
extraPaths = [
(toString ./Cargo.lock)
(toString ./common/migrations)
(toString ./common/schemas)
(toString ./html-router/templates)

View File

@@ -419,6 +419,10 @@
document.addEventListener('DOMContentLoaded', () => tryRender(document));
// HTMX partial swaps
document.body.addEventListener('knowledge-graph-refresh', () => {
tryRender(document);
});
document.body.addEventListener('htmx:afterSettle', (evt) => {
tryRender(evt && evt.target ? evt.target : document);
});

File diff suppressed because one or more lines are too long

View File

@@ -1,4 +1,4 @@
use common::storage::db::SurrealDbClient;
use common::storage::{db::SurrealDbClient, store::StorageManager};
use common::utils::template_engine::{ProvidesTemplateEngine, TemplateEngine};
use common::{create_template_engine, storage::db::ProvidesDb, utils::config::AppConfig};
use composite_retrieval::reranking::RerankerPool;
@@ -14,14 +14,16 @@ pub struct HtmlState {
pub templates: Arc<TemplateEngine>,
pub session_store: Arc<SessionStoreType>,
pub config: AppConfig,
pub storage: StorageManager,
pub reranker_pool: Option<Arc<RerankerPool>>,
}
impl HtmlState {
pub fn new_with_resources(
pub async fn new_with_resources(
db: Arc<SurrealDbClient>,
openai_client: Arc<OpenAIClientType>,
session_store: Arc<SessionStoreType>,
storage: StorageManager,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
) -> Result<Self, Box<dyn std::error::Error>> {
@@ -34,6 +36,7 @@ impl HtmlState {
session_store,
templates: Arc::new(template_engine),
config,
storage,
reranker_pool,
})
}

View File

@@ -1,5 +1,9 @@
use async_openai::types::ListModelResponse;
use axum::{extract::State, response::IntoResponse, Form};
use axum::{
extract::{Query, State},
response::IntoResponse,
Form,
};
use serde::{Deserialize, Serialize};
use common::{
@@ -31,44 +35,83 @@ use crate::{
pub struct AdminPanelData {
user: User,
settings: SystemSettings,
analytics: Analytics,
users: i64,
analytics: Option<Analytics>,
users: Option<i64>,
default_query_prompt: String,
default_image_prompt: String,
conversation_archive: Vec<Conversation>,
available_models: ListModelResponse,
available_models: Option<ListModelResponse>,
current_section: AdminSection,
}
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AdminSection {
Overview,
Models,
}
impl Default for AdminSection {
fn default() -> Self {
Self::Overview
}
}
#[derive(Deserialize)]
pub struct AdminPanelQuery {
section: Option<String>,
}
pub async fn show_admin_panel(
State(state): State<HtmlState>,
RequireUser(user): RequireUser,
Query(query): Query<AdminPanelQuery>,
) -> Result<impl IntoResponse, HtmlError> {
let (
settings_res,
analytics_res,
user_count_res,
conversation_archive_res,
available_models_res,
) = tokio::join!(
let section = match query.section.as_deref() {
Some("models") => AdminSection::Models,
_ => AdminSection::Overview,
};
let (settings, conversation_archive) = tokio::try_join!(
SystemSettings::get_current(&state.db),
Analytics::get_current(&state.db),
Analytics::get_users_amount(&state.db),
User::get_user_conversations(&user.id, &state.db),
async { state.openai_client.models().list().await }
);
User::get_user_conversations(&user.id, &state.db)
)?;
let (analytics, users) = if section == AdminSection::Overview {
let (analytics, users) = tokio::try_join!(
Analytics::get_current(&state.db),
Analytics::get_users_amount(&state.db)
)?;
(Some(analytics), Some(users))
} else {
(None, None)
};
let available_models = if section == AdminSection::Models {
Some(
state
.openai_client
.models()
.list()
.await
.map_err(|e| AppError::InternalError(e.to_string()))?,
)
} else {
None
};
Ok(TemplateResponse::new_template(
"admin/base.html",
AdminPanelData {
user,
settings: settings_res?,
analytics: analytics_res?,
available_models: available_models_res
.map_err(|e| AppError::InternalError(e.to_string()))?,
users: user_count_res?,
settings,
analytics,
available_models,
users,
default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
conversation_archive: conversation_archive_res?,
conversation_archive,
current_section: section,
},
))
}
@@ -115,7 +158,7 @@ pub async fn toggle_registration_status(
SystemSettings::update(&state.db, new_settings.clone()).await?;
Ok(TemplateResponse::new_partial(
"admin/base.html",
"admin/sections/overview.html",
"registration_status_input",
RegistrationToggleData {
settings: new_settings,
@@ -217,7 +260,7 @@ pub async fn update_model_settings(
.map_err(|_e| AppError::InternalError("Failed to get models".to_string()))?;
Ok(TemplateResponse::new_partial(
"admin/base.html",
"admin/sections/models.html",
"model_settings_form",
ModelSettingsData {
settings: new_settings,
@@ -282,7 +325,7 @@ pub async fn patch_query_prompt(
SystemSettings::update(&state.db, new_settings.clone()).await?;
Ok(TemplateResponse::new_partial(
"admin/base.html",
"admin/sections/overview.html",
"system_prompt_section",
SystemPromptSectionData {
settings: new_settings,
@@ -341,7 +384,7 @@ pub async fn patch_ingestion_prompt(
SystemSettings::update(&state.db, new_settings.clone()).await?;
Ok(TemplateResponse::new_partial(
"admin/base.html",
"admin/sections/overview.html",
"system_prompt_section",
SystemPromptSectionData {
settings: new_settings,
@@ -400,7 +443,7 @@ pub async fn patch_image_prompt(
SystemSettings::update(&state.db, new_settings.clone()).await?;
Ok(TemplateResponse::new_partial(
"admin/base.html",
"admin/sections/overview.html",
"system_prompt_section",
SystemPromptSectionData {
settings: new_settings,

View File

@@ -190,7 +190,7 @@ pub async fn delete_text_content(
TextContent::has_other_with_file(&file_info.id, &text_content.id, &state.db).await?;
if !file_in_use {
FileInfo::delete_by_id(&file_info.id, &state.db, &state.config).await?;
FileInfo::delete_by_id_with_storage(&file_info.id, &state.db, &state.storage).await?;
}
}

View File

@@ -17,7 +17,6 @@ use crate::{
utils::text_content_preview::truncate_text_contents,
AuthSessionType,
};
use common::storage::store;
use common::storage::types::user::DashboardStats;
use common::{
error::AppError,
@@ -86,7 +85,7 @@ pub async fn delete_text_content(
TextContent::has_other_with_file(&file_info.id, &text_content.id, &state.db).await?;
if !file_in_use {
FileInfo::delete_by_id(&file_info.id, &state.db, &state.config).await?;
FileInfo::delete_by_id_with_storage(&file_info.id, &state.db, &state.storage).await?;
}
}
@@ -278,7 +277,7 @@ pub async fn serve_file(
return Ok(TemplateResponse::unauthorized().into_response());
}
let stream = match store::get_stream_at(&file_info.path, &state.config).await {
let stream = match state.storage.get_stream(&file_info.path).await {
Ok(s) => s,
Err(_) => return Ok(TemplateResponse::server_error().into_response()),
};

View File

@@ -98,7 +98,8 @@ pub async fn process_ingress_form(
info!("{:?}", input);
let file_infos = try_join_all(input.files.into_iter().map(|file| {
FileInfo::new(file, &state.db, &user.id, &state.config).map_err(AppError::from)
FileInfo::new_with_storage(file, &state.db, &user.id, &state.storage)
.map_err(AppError::from)
}))
.await?;

View File

@@ -4,10 +4,11 @@ use std::fmt;
use axum::{
extract::{Path, Query, State},
response::IntoResponse,
http::HeaderValue,
response::{IntoResponse, Response},
Form, Json,
};
use axum_htmx::{HxBoosted, HxRequest};
use axum_htmx::{HxBoosted, HxRequest, HX_TRIGGER};
use serde::{
de::{self, Deserializer, MapAccess, Visitor},
Deserialize, Serialize,
@@ -38,10 +39,96 @@ use crate::{
use url::form_urlencoded;
const KNOWLEDGE_ENTITIES_PER_PAGE: usize = 12;
const DEFAULT_RELATIONSHIP_TYPE: &str = "relates_to";
const RELATIONSHIP_TYPE_OPTIONS: &[&str] = &["RelatedTo", "RelevantTo", "SimilarTo", "References"];
const DEFAULT_RELATIONSHIP_TYPE: &str = RELATIONSHIP_TYPE_OPTIONS[0];
const MAX_RELATIONSHIP_SUGGESTIONS: usize = 10;
const SUGGESTION_MIN_SCORE: f32 = 0.5;
const GRAPH_REFRESH_TRIGGER: &str = r#"{"knowledge-graph-refresh":true}"#;
const RELATIONSHIP_TYPE_ALIASES: &[(&str, &str)] = &[("relatesto", "RelatedTo")];
fn relationship_type_or_default(value: Option<&str>) -> String {
match value {
Some(raw) => canonicalize_relationship_type(raw),
None => DEFAULT_RELATIONSHIP_TYPE.to_string(),
}
}
fn canonicalize_relationship_type(value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return DEFAULT_RELATIONSHIP_TYPE.to_string();
}
let key: String = trimmed
.chars()
.filter(|c| c.is_ascii_alphanumeric())
.flat_map(|c| c.to_lowercase())
.collect();
for option in RELATIONSHIP_TYPE_OPTIONS {
let option_key: String = option
.chars()
.filter(|c| c.is_ascii_alphanumeric())
.flat_map(|c| c.to_lowercase())
.collect();
if option_key == key {
return (*option).to_string();
}
}
for (alias, target) in RELATIONSHIP_TYPE_ALIASES {
if *alias == key {
return (*target).to_string();
}
}
let mut result = String::new();
for segment in trimmed
.split(|c: char| !c.is_ascii_alphanumeric())
.filter(|segment| !segment.is_empty())
{
let mut chars = segment.chars();
if let Some(first) = chars.next() {
result.extend(first.to_uppercase());
for ch in chars {
result.extend(ch.to_lowercase());
}
}
}
if result.is_empty() {
trimmed.to_string()
} else {
result
}
}
fn collect_relationship_type_options(relationships: &[KnowledgeRelationship]) -> Vec<String> {
let mut options: HashSet<String> = RELATIONSHIP_TYPE_OPTIONS
.iter()
.map(|value| (*value).to_string())
.collect();
for relationship in relationships {
options.insert(canonicalize_relationship_type(
&relationship.metadata.relationship_type,
));
}
let mut options: Vec<String> = options.into_iter().collect();
options.sort();
options
}
fn respond_with_graph_refresh(response: TemplateResponse) -> Response {
let mut response = response.into_response();
if let Ok(value) = HeaderValue::from_str(GRAPH_REFRESH_TRIGGER) {
response.headers_mut().insert(HX_TRIGGER, value);
}
response
}
#[derive(Deserialize, Default)]
pub struct FilterParams {
entity_type: Option<String>,
@@ -59,6 +146,8 @@ pub async fn show_new_knowledge_entity_form(
.collect();
let existing_entities = User::get_knowledge_entities(&user.id, &state.db).await?;
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
let relationship_type_options = collect_relationship_type_options(&relationships);
let empty_selected: HashSet<String> = HashSet::new();
let empty_scores: HashMap<String, f32> = HashMap::new();
let relationship_options =
@@ -70,9 +159,10 @@ pub async fn show_new_knowledge_entity_form(
entity_types,
relationship_list: RelationshipListData {
relationship_options,
relationship_type: DEFAULT_RELATIONSHIP_TYPE.to_string(),
relationship_type: relationship_type_or_default(None),
suggestion_count: 0,
},
relationship_type_options,
},
))
}
@@ -107,13 +197,7 @@ pub async fn create_knowledge_entity(
state.db.store_item(new_entity.clone()).await?;
let relationship_type = form
.relationship_type
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.unwrap_or(DEFAULT_RELATIONSHIP_TYPE)
.to_string();
let relationship_type = relationship_type_or_default(form.relationship_type.as_deref());
debug!("form: {:?}", form);
if !form.relationship_ids.is_empty() {
@@ -148,11 +232,11 @@ pub async fn create_knowledge_entity(
let default_params = FilterParams::default();
let kb_data = build_knowledge_base_data(&state, &user, &default_params).await?;
Ok(TemplateResponse::new_partial(
Ok(respond_with_graph_refresh(TemplateResponse::new_partial(
"knowledge/base.html",
"main",
kb_data,
))
)))
}
pub async fn suggest_knowledge_relationships(
@@ -225,13 +309,7 @@ pub async fn suggest_knowledge_relationships(
}
}
let relationship_type = form
.relationship_type
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.unwrap_or(DEFAULT_RELATIONSHIP_TYPE)
.to_string();
let relationship_type = relationship_type_or_default(form.relationship_type.as_deref());
let entities: Vec<KnowledgeEntity> = entity_lookup.into_values().collect();
let relationship_options =
@@ -251,7 +329,7 @@ pub async fn suggest_knowledge_relationships(
pub struct KnowledgeBaseData {
entities: Vec<KnowledgeEntity>,
visible_entities: Vec<KnowledgeEntity>,
relationships: Vec<KnowledgeRelationship>,
relationships: Vec<RelationshipTableRow>,
user: User,
entity_types: Vec<String>,
content_categories: Vec<String>,
@@ -260,6 +338,8 @@ pub struct KnowledgeBaseData {
conversation_archive: Vec<Conversation>,
pagination: Pagination,
page_query: String,
relationship_type_options: Vec<String>,
default_relationship_type: String,
}
#[derive(Serialize)]
@@ -270,6 +350,12 @@ pub struct RelationshipOption {
score: Option<f32>,
}
#[derive(Serialize)]
pub struct RelationshipTableRow {
relationship: KnowledgeRelationship,
relationship_type_label: String,
}
fn build_relationship_options(
entities: Vec<KnowledgeEntity>,
selected_ids: &HashSet<String>,
@@ -309,6 +395,40 @@ fn build_relationship_options(
options
}
fn build_relationship_table_data(
entities: Vec<KnowledgeEntity>,
relationships: Vec<KnowledgeRelationship>,
) -> RelationshipTableData {
let relationship_type_options = collect_relationship_type_options(&relationships);
let mut frequency: HashMap<String, usize> = HashMap::new();
let relationships = relationships
.into_iter()
.map(|relationship| {
let relationship_type_label =
canonicalize_relationship_type(&relationship.metadata.relationship_type);
*frequency
.entry(relationship_type_label.clone())
.or_insert(0) += 1;
RelationshipTableRow {
relationship,
relationship_type_label,
}
})
.collect();
let default_relationship_type = frequency
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(label, _)| label)
.unwrap_or_else(|| DEFAULT_RELATIONSHIP_TYPE.to_string());
RelationshipTableData {
entities,
relationships,
relationship_type_options,
default_relationship_type,
}
}
async fn build_knowledge_base_data(
state: &HtmlState,
user: &User,
@@ -348,10 +468,16 @@ async fn build_knowledge_base_data(
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
let entity_id_set: HashSet<String> = entities.iter().map(|e| e.id.clone()).collect();
let relationships: Vec<KnowledgeRelationship> = relationships
let filtered_relationships: Vec<KnowledgeRelationship> = relationships
.into_iter()
.filter(|rel| entity_id_set.contains(&rel.in_) && entity_id_set.contains(&rel.out))
.collect();
let RelationshipTableData {
entities: _,
relationships,
relationship_type_options,
default_relationship_type,
} = build_relationship_table_data(entities.clone(), filtered_relationships);
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
Ok(KnowledgeBaseData {
@@ -366,6 +492,8 @@ async fn build_knowledge_base_data(
conversation_archive,
pagination,
page_query,
relationship_type_options,
default_relationship_type,
})
}
@@ -380,6 +508,7 @@ pub struct RelationshipListData {
pub struct NewEntityModalData {
entity_types: Vec<String>,
relationship_list: RelationshipListData,
relationship_type_options: Vec<String>,
}
#[derive(Debug)]
@@ -679,7 +808,7 @@ pub async fn get_knowledge_graph_json(
links.push(GraphLink {
source: rel.out.clone(),
target: rel.in_.clone(),
relationship_type: rel.metadata.relationship_type.clone(),
relationship_type: canonicalize_relationship_type(&rel.metadata.relationship_type),
});
}
}
@@ -806,7 +935,7 @@ pub async fn patch_knowledge_entity(
let content_categories = User::get_user_categories(&user.id, &state.db).await?;
// Render updated list
Ok(TemplateResponse::new_template(
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
"knowledge/entity_list.html",
EntityListData {
visible_entities,
@@ -818,7 +947,7 @@ pub async fn patch_knowledge_entity(
selected_content_category: None,
page_query: String::new(),
},
))
)))
}
pub async fn delete_knowledge_entity(
@@ -845,7 +974,7 @@ pub async fn delete_knowledge_entity(
// Get content categories
let content_categories = User::get_user_categories(&user.id, &state.db).await?;
Ok(TemplateResponse::new_template(
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
"knowledge/entity_list.html",
EntityListData {
visible_entities,
@@ -857,13 +986,15 @@ pub async fn delete_knowledge_entity(
selected_content_category: None,
page_query: String::new(),
},
))
)))
}
#[derive(Serialize)]
pub struct RelationshipTableData {
entities: Vec<KnowledgeEntity>,
relationships: Vec<KnowledgeRelationship>,
relationships: Vec<RelationshipTableRow>,
relationship_type_options: Vec<String>,
default_relationship_type: String,
}
pub async fn delete_knowledge_relationship(
@@ -876,15 +1007,13 @@ pub async fn delete_knowledge_relationship(
let entities = User::get_knowledge_entities(&user.id, &state.db).await?;
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
let table_data = build_relationship_table_data(entities, relationships);
// Render updated list
Ok(TemplateResponse::new_template(
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
"knowledge/relationship_table.html",
RelationshipTableData {
entities,
relationships,
},
))
table_data,
)))
}
#[derive(Deserialize)]
@@ -900,12 +1029,13 @@ pub async fn save_knowledge_relationship(
Form(form): Form<SaveKnowledgeRelationshipInput>,
) -> Result<impl IntoResponse, HtmlError> {
// Construct relationship
let relationship_type = canonicalize_relationship_type(&form.relationship_type);
let relationship = KnowledgeRelationship::new(
form.in_,
form.out,
user.id.clone(),
"manual".into(),
form.relationship_type,
relationship_type,
);
relationship.store_relationship(&state.db).await?;
@@ -913,13 +1043,11 @@ pub async fn save_knowledge_relationship(
let entities = User::get_knowledge_entities(&user.id, &state.db).await?;
let relationships = User::get_knowledge_relationships(&user.id, &state.db).await?;
let table_data = build_relationship_table_data(entities, relationships);
// Render updated list
Ok(TemplateResponse::new_template(
Ok(respond_with_graph_refresh(TemplateResponse::new_template(
"knowledge/relationship_table.html",
RelationshipTableData {
entities,
relationships,
},
))
table_data,
)))
}

View File

@@ -5,6 +5,7 @@ use axum::{
Form,
};
use axum_htmx::{HxBoosted, HxRequest, HX_TRIGGER};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::html_state::HtmlState;
@@ -32,7 +33,7 @@ pub struct ScratchpadListItem {
id: String,
title: String,
content: String,
last_saved_at: String,
last_saved_at: DateTime<Utc>,
}
#[derive(Serialize)]
@@ -46,9 +47,9 @@ pub struct ScratchpadDetailData {
pub struct ScratchpadArchiveItem {
id: String,
title: String,
archived_at: String,
archived_at: Option<DateTime<Utc>>,
#[serde(skip_serializing_if = "Option::is_none")]
ingested_at: Option<String>,
ingested_at: Option<DateTime<Utc>>,
}
#[derive(Serialize)]
@@ -56,9 +57,9 @@ pub struct ScratchpadDetail {
id: String,
title: String,
content: String,
created_at: String,
updated_at: String,
last_saved_at: String,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
last_saved_at: DateTime<Utc>,
is_dirty: bool,
}
@@ -75,7 +76,7 @@ impl From<&Scratchpad> for ScratchpadListItem {
id: value.id.clone(),
title: value.title.clone(),
content: value.content.clone(),
last_saved_at: value.last_saved_at.format("%Y-%m-%d %H:%M").to_string(),
last_saved_at: value.last_saved_at,
}
}
}
@@ -85,13 +86,8 @@ impl From<&Scratchpad> for ScratchpadArchiveItem {
Self {
id: value.id.clone(),
title: value.title.clone(),
archived_at: value
.archived_at
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
.unwrap_or_else(|| "Unknown".to_string()),
ingested_at: value
.ingested_at
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()),
archived_at: value.archived_at,
ingested_at: value.ingested_at,
}
}
}
@@ -102,9 +98,9 @@ impl From<&Scratchpad> for ScratchpadDetail {
id: value.id.clone(),
title: value.title.clone(),
content: value.content.clone(),
created_at: value.created_at.format("%Y-%m-%d %H:%M:%S").to_string(),
updated_at: value.updated_at.format("%Y-%m-%d %H:%M:%S").to_string(),
last_saved_at: value.last_saved_at.format("%Y-%m-%d %H:%M:%S").to_string(),
created_at: value.created_at,
updated_at: value.updated_at,
last_saved_at: value.last_saved_at,
is_dirty: value.is_dirty,
}
}
@@ -391,6 +387,126 @@ pub async fn ingest_scratchpad(
Ok(response)
}
pub async fn archive_scratchpad(
RequireUser(user): RequireUser,
State(state): State<HtmlState>,
Path(scratchpad_id): Path<String>,
) -> Result<impl IntoResponse, HtmlError> {
Scratchpad::archive(&scratchpad_id, &user.id, &state.db, false).await?;
let scratchpads = Scratchpad::get_by_user(&user.id, &state.db).await?;
let archived_scratchpads = Scratchpad::get_archived_by_user(&user.id, &state.db).await?;
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
let scratchpad_list: Vec<ScratchpadListItem> =
scratchpads.iter().map(ScratchpadListItem::from).collect();
let archived_list: Vec<ScratchpadArchiveItem> = archived_scratchpads
.iter()
.map(ScratchpadArchiveItem::from)
.collect();
Ok(TemplateResponse::new_template(
"scratchpad/base.html",
ScratchpadPageData {
user,
scratchpads: scratchpad_list,
archived_scratchpads: archived_list,
conversation_archive,
new_scratchpad: None,
},
))
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
#[test]
fn test_scratchpad_list_item_conversion() {
// Create a test scratchpad with datetime values
let now = Utc::now();
let mut scratchpad = common::storage::types::scratchpad::Scratchpad::new(
"test_user".to_string(),
"Test Scratchpad".to_string(),
);
// Override the timestamps with known values for testing
scratchpad.last_saved_at = now;
// Test conversion to ScratchpadListItem
let list_item = ScratchpadListItem::from(&scratchpad);
assert_eq!(list_item.id, scratchpad.id);
assert_eq!(list_item.title, scratchpad.title);
assert_eq!(list_item.content, scratchpad.content);
assert_eq!(list_item.last_saved_at, scratchpad.last_saved_at);
}
#[test]
fn test_scratchpad_detail_conversion() {
// Create a test scratchpad with datetime values
let now = Utc::now();
let mut scratchpad = common::storage::types::scratchpad::Scratchpad::new(
"test_user".to_string(),
"Test Scratchpad".to_string(),
);
// Override the timestamps with known values for testing
scratchpad.last_saved_at = now;
// Test conversion to ScratchpadDetail
let detail = ScratchpadDetail::from(&scratchpad);
assert_eq!(detail.id, scratchpad.id);
assert_eq!(detail.title, scratchpad.title);
assert_eq!(detail.content, scratchpad.content);
assert_eq!(detail.created_at, scratchpad.created_at);
assert_eq!(detail.updated_at, scratchpad.updated_at);
assert_eq!(detail.last_saved_at, scratchpad.last_saved_at);
assert_eq!(detail.is_dirty, scratchpad.is_dirty);
}
#[test]
fn test_scratchpad_archive_item_conversion() {
// Create a test scratchpad with optional datetime values
let now = Utc::now();
let mut scratchpad = common::storage::types::scratchpad::Scratchpad::new(
"test_user".to_string(),
"Test Scratchpad".to_string(),
);
// Set optional datetime fields
scratchpad.archived_at = Some(now);
scratchpad.ingested_at = Some(now);
// Test conversion to ScratchpadArchiveItem
let archive_item = ScratchpadArchiveItem::from(&scratchpad);
assert_eq!(archive_item.id, scratchpad.id);
assert_eq!(archive_item.title, scratchpad.title);
assert_eq!(archive_item.archived_at, scratchpad.archived_at);
assert_eq!(archive_item.ingested_at, scratchpad.ingested_at);
}
#[test]
fn test_scratchpad_archive_item_conversion_with_none_values() {
// Create a test scratchpad without optional datetime values
let scratchpad = common::storage::types::scratchpad::Scratchpad::new(
"test_user".to_string(),
"Test Scratchpad".to_string(),
);
// Test conversion to ScratchpadArchiveItem
let archive_item = ScratchpadArchiveItem::from(&scratchpad);
assert_eq!(archive_item.id, scratchpad.id);
assert_eq!(archive_item.title, scratchpad.title);
assert_eq!(archive_item.archived_at, None);
assert_eq!(archive_item.ingested_at, None);
}
}
pub async fn restore_scratchpad(
RequireUser(user): RequireUser,
State(state): State<HtmlState>,
@@ -439,52 +555,3 @@ pub async fn restore_scratchpad(
Ok(response)
}
pub async fn archive_scratchpad(
RequireUser(user): RequireUser,
State(state): State<HtmlState>,
Path(scratchpad_id): Path<String>,
) -> Result<impl IntoResponse, HtmlError> {
Scratchpad::archive(&scratchpad_id, &user.id, &state.db, false).await?;
let scratchpads = Scratchpad::get_by_user(&user.id, &state.db).await?;
let archived_scratchpads = Scratchpad::get_archived_by_user(&user.id, &state.db).await?;
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
let scratchpad_list: Vec<ScratchpadListItem> =
scratchpads.iter().map(ScratchpadListItem::from).collect();
let archived_list: Vec<ScratchpadArchiveItem> = archived_scratchpads
.iter()
.map(ScratchpadArchiveItem::from)
.collect();
let trigger_payload = serde_json::json!({
"toast": {
"title": "Scratchpad archived",
"description": "You can find it in the archive drawer below.",
"type": "warning"
}
});
let trigger_value = serde_json::to_string(&trigger_payload).unwrap_or_else(|_| {
r#"{"toast":{"title":"Scratchpad archived","description":"You can find it in the archive drawer below.","type":"warning"}}"#.to_string()
});
let template_response = TemplateResponse::new_partial(
"scratchpad/base.html",
"main",
ScratchpadPageData {
user,
scratchpads: scratchpad_list,
archived_scratchpads: archived_list,
conversation_archive,
new_scratchpad: None,
},
);
let mut response = template_response.into_response();
if let Ok(header_value) = HeaderValue::from_str(&trigger_value) {
response.headers_mut().insert(HX_TRIGGER, header_value);
}
Ok(response)
}

View File

@@ -3,154 +3,49 @@
{% block title %}Minne - Admin{% endblock %}
{% block main %}
<div class="flex justify-center grow mt-2 sm:mt-4 pb-4">
<div class="container">
<section class="mb-4">
<div class="nb-panel p-3 flex items-center justify-between">
<h1 class="text-xl font-extrabold tracking-tight">Admin Dashboard</h1>
<div id="admin-shell" class="flex justify-center grow mt-2 sm:mt-4 pb-4">
<div class="container flex flex-col gap-4">
<section class="nb-panel p-4 sm:p-5 flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
<div>
<h1 class="text-xl font-extrabold tracking-tight">Admin Controls</h1>
<p class="text-sm opacity-70 max-w-2xl">
Stay on top of analytics and manage AI integrations without waiting on long-running model calls.
</p>
</div>
<div class="text-xs opacity-60 sm:text-right">
Signed in as <span class="font-medium">{{ user.email }}</span>
</div>
</section>
<section class="mb-4">
<div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
<div class="nb-stat">
<div class="text-xs opacity-70">Page Loads</div>
<div class="text-3xl font-extrabold">{{analytics.page_loads}}</div>
<div class="text-xs opacity-60">Total page load events</div>
</div>
<div class="nb-stat">
<div class="text-xs opacity-70">Unique Visitors</div>
<div class="text-3xl font-extrabold">{{analytics.visitors}}</div>
<div class="text-xs opacity-60">Distinct users by fingerprint</div>
</div>
<div class="nb-stat">
<div class="text-xs opacity-70">Users</div>
<div class="text-3xl font-extrabold">{{users}}</div>
<div class="text-xs opacity-60">Registered accounts</div>
</div>
</div>
</section>
<nav
class="nb-panel p-2 flex flex-wrap gap-2 text-sm"
hx-boost="true"
hx-target="#admin-shell"
hx-select="#admin-shell"
hx-swap="outerHTML"
hx-push-url="true"
>
<a
href="/admin?section=overview"
class="nb-btn btn-sm px-4 {% if current_section == 'overview' %}nb-cta{% else %}btn-ghost{% endif %}"
>
Overview
</a>
<a
href="/admin?section=models"
class="nb-btn btn-sm px-4 {% if current_section == 'models' %}nb-cta{% else %}btn-ghost{% endif %}"
>
Models
</a>
</nav>
<section class="grid grid-cols-1 xl:grid-cols-2 gap-4">
{% block system_prompt_section %}
<div id="system_prompt_section" class="nb-panel p-4">
<div class="text-sm font-semibold mb-3">System Prompts</div>
<div class="flex gap-2 flex-col sm:flex-row">
<button type="button" class="nb-btn btn-sm" hx-get="/edit-query-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Query Prompt</button>
<button type="button" class="nb-btn btn-sm" hx-get="/edit-ingestion-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Ingestion Prompt</button>
<button type="button" class="nb-btn btn-sm" hx-get="/edit-image-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Image Prompt</button>
</div>
</div>
{% endblock %}
<div class="nb-panel p-4">
<div class="text-sm font-semibold mb-3">AI Models</div>
{% block model_settings_form %}
<form hx-patch="/update-model-settings" hx-swap="outerHTML" class="grid grid-cols-1 gap-4">
<!-- Query Model -->
<div>
<div class="text-sm opacity-80 mb-1">Query Model</div>
<select name="query_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{model.id}}" {% if settings.query_model==model.id %} selected {% endif %}>{{model.id}}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.query_model}}</span></p>
</div>
<!-- Processing Model -->
<div>
<div class="text-sm opacity-80 mb-1">Processing Model</div>
<select name="processing_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{model.id}}" {% if settings.processing_model==model.id %} selected {% endif %}>{{model.id}}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.processing_model}}</span></p>
</div>
<!-- Image Processing Model -->
<div>
<div class="text-sm opacity-80 mb-1">Image Processing Model</div>
<select name="image_processing_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{model.id}}" {% if settings.image_processing_model==model.id %} selected {% endif %}>{{model.id}}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.image_processing_model}}</span></p>
</div>
<!-- Voice Processing Model -->
<div>
<div class="text-sm opacity-80 mb-1">Voice Processing Model</div>
<select name="voice_processing_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{model.id}}" {% if settings.voice_processing_model==model.id %} selected {% endif %}>{{model.id}}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.voice_processing_model}}</span></p>
</div>
<!-- Embedding Model -->
<div>
<div class="text-sm opacity-80 mb-1">Embedding Model</div>
<select name="embedding_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{model.id}}" {% if settings.embedding_model==model.id %} selected {% endif %}>{{model.id}}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{settings.embedding_model}} ({{settings.embedding_dimensions}} dims)</span></p>
</div>
<!-- Embedding Dimensions -->
<div>
<div class="text-sm opacity-80 mb-1" for="embedding_dimensions">Embedding Dimensions</div>
<input type="number" id="embedding_dimensions" name="embedding_dimensions" class="nb-input w-full" value="{{ settings.embedding_dimensions }}" required />
</div>
<!-- Alert -->
<div id="embedding-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
<div class="text-sm"><strong>Warning:</strong> Changing dimensions will require re-creating all embeddings. Look up your model's required dimensions or use a model that allows specifying them.</div>
</div>
<div class="flex justify-end">
<button type="submit" class="nb-btn nb-cta btn-sm">Save Model Settings</button>
</div>
</form>
<script>
// Rebind after HTMX swaps
(() => {
const dimensionInput = document.getElementById('embedding_dimensions');
const alertElement = document.getElementById('embedding-change-alert');
const initialDimensions = '{{ settings.embedding_dimensions }}';
if (dimensionInput && alertElement) {
dimensionInput.addEventListener('input', (event) => {
if (String(event.target.value) !== String(initialDimensions)) {
alertElement.classList.remove('hidden');
} else {
alertElement.classList.add('hidden');
}
});
}
})();
</script>
{% endblock %}
</div>
<div class="nb-panel p-4">
<div class="text-sm font-semibold mb-3">Registration</div>
<label class="flex items-center gap-3">
{% block registration_status_input %}
<form hx-patch="/toggle-registrations" hx-swap="outerHTML" hx-trigger="change">
<input name="registration_open" type="checkbox" class="nb-checkbox" {% if settings.registrations_enabled %}checked{% endif %} />
</form>
{% endblock %}
<span class="text-sm">Enable Registrations</span>
</label>
<div id="registration-status" class="text-xs opacity-70 mt-2"></div>
</div>
</section>
<div id="admin-content" class="flex flex-col gap-4">
{% if current_section == 'models' %}
{% include 'admin/sections/models.html' %}
{% else %}
{% include 'admin/sections/overview.html' %}
{% endif %}
</div>
</div>
</div>
{% endblock %}

View File

@@ -0,0 +1,130 @@
<section class="nb-panel p-4 sm:p-5 flex flex-col gap-4">
<div class="flex items-start justify-between flex-col sm:flex-row gap-3">
<div>
<div class="text-sm uppercase tracking-wide opacity-60 mb-1">AI Models</div>
<h2 class="text-lg font-semibold">Model configuration</h2>
<p class="text-xs opacity-70 max-w-2xl">
Choose which models power conversational search, ingestion analysis, and embeddings. Adjusting embeddings may trigger a full reprocess.
</p>
</div>
<a
href="/admin?section=overview"
class="nb-btn btn-sm btn-ghost"
hx-boost="true"
hx-target="#admin-shell"
hx-select="#admin-shell"
hx-swap="outerHTML"
hx-push-url="true"
>
← Back to Admin
</a>
</div>
{% if available_models %}
{% block model_settings_form %}
<form hx-patch="/update-model-settings" hx-swap="outerHTML" class="grid grid-cols-1 gap-4">
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
<div>
<div class="text-sm opacity-80 mb-1">Query Model</div>
<select name="query_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{ model.id }}" {% if settings.query_model == model.id %}selected{% endif %}>{{ model.id }}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.query_model }}</span></p>
</div>
<div>
<div class="text-sm opacity-80 mb-1">Processing Model</div>
<select name="processing_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{ model.id }}" {% if settings.processing_model == model.id %}selected{% endif %}>{{ model.id }}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.processing_model }}</span></p>
</div>
</div>
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
<div>
<div class="text-sm opacity-80 mb-1">Image Processing Model</div>
<select name="image_processing_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{ model.id }}" {% if settings.image_processing_model == model.id %}selected{% endif %}>{{ model.id }}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.image_processing_model }}</span></p>
</div>
<div>
<div class="text-sm opacity-80 mb-1">Voice Processing Model</div>
<select name="voice_processing_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{ model.id }}" {% if settings.voice_processing_model == model.id %}selected{% endif %}>{{ model.id }}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.voice_processing_model }}</span></p>
</div>
</div>
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
<div>
<div class="text-sm opacity-80 mb-1">Embedding Model</div>
<select name="embedding_model" class="nb-select w-full">
{% for model in available_models.data %}
<option value="{{ model.id }}" {% if settings.embedding_model == model.id %}selected{% endif %}>{{ model.id }}</option>
{% endfor %}
</select>
<p class="text-xs opacity-70 mt-1">Current: <span class="font-mono">{{ settings.embedding_model }}</span></p>
</div>
<div>
<div class="text-sm opacity-80 mb-1" for="embedding_dimensions">Embedding Dimensions</div>
<input
type="number"
id="embedding_dimensions"
name="embedding_dimensions"
class="nb-input w-full"
value="{{ settings.embedding_dimensions }}"
required
min="1"
/>
<p class="text-xs opacity-70 mt-1">Changing dimensions will trigger a background re-embedding.</p>
</div>
</div>
<div id="embedding-change-alert" class="nb-panel p-3 bg-warning/20 hidden">
<div class="text-sm">
<strong>Warning:</strong> Changing dimensions recreates embeddings for text chunks and knowledge entities. Confirm the target model requires the new value.
</div>
</div>
<div class="flex justify-end gap-2">
<button type="submit" class="nb-btn nb-cta btn-sm">Save Model Settings</button>
</div>
</form>
<script>
(() => {
const dimensionInput = document.getElementById('embedding_dimensions');
const alertElement = document.getElementById('embedding-change-alert');
const initialDimensions = '{{ settings.embedding_dimensions }}';
if (dimensionInput && alertElement) {
dimensionInput.addEventListener('input', (event) => {
if (String(event.target.value) !== String(initialDimensions)) {
alertElement.classList.remove('hidden');
} else {
alertElement.classList.add('hidden');
}
});
}
})();
</script>
{% endblock %}
{% else %}
<div class="nb-panel p-4 bg-warning/10 border border-warning/40">
<div class="text-sm font-semibold mb-1">Unable to load models</div>
<p class="text-xs opacity-70">We could not reach the model provider. Check the API key and retry.</p>
</div>
{% endif %}
</section>

View File

@@ -0,0 +1,57 @@
{% if analytics %}
<section class="grid grid-cols-1 sm:grid-cols-3 gap-4">
<div class="nb-stat">
<div class="text-xs opacity-70">Page Loads</div>
<div class="text-3xl font-extrabold">{{ analytics.page_loads }}</div>
<div class="text-xs opacity-60">Total load events seen by Minne</div>
</div>
<div class="nb-stat">
<div class="text-xs opacity-70">Unique Visitors</div>
<div class="text-3xl font-extrabold">{{ analytics.visitors }}</div>
<div class="text-xs opacity-60">Distinct users by fingerprint</div>
</div>
<div class="nb-stat">
<div class="text-xs opacity-70">Users</div>
<div class="text-3xl font-extrabold">{{ users or 0 }}</div>
<div class="text-xs opacity-60">Registered accounts</div>
</div>
</section>
{% else %}
<section class="nb-panel p-4">
<div class="text-sm font-semibold mb-2">Analytics unavailable</div>
<p class="text-xs opacity-70">We could not fetch analytics for this view. Reload or check the monitoring pipeline.</p>
</section>
{% endif %}
<section class="grid grid-cols-1 xl:grid-cols-2 gap-4">
{% block system_prompt_section %}
<div id="system_prompt_section" class="nb-panel p-4">
<div class="flex items-start justify-between gap-2 mb-3">
<div>
<div class="text-sm font-semibold">System Prompts</div>
<p class="text-xs opacity-70">Adjust the prompts that power retrieval, ingestion analysis, and image processing flows.</p>
</div>
<span class="text-[10px] uppercase tracking-wide opacity-60">LLM</span>
</div>
<div class="flex gap-2 flex-col sm:flex-row">
<button type="button" class="nb-btn btn-sm" hx-get="/edit-query-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Query Prompt</button>
<button type="button" class="nb-btn btn-sm" hx-get="/edit-ingestion-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Ingestion Prompt</button>
<button type="button" class="nb-btn btn-sm" hx-get="/edit-image-prompt" hx-target="#modal" hx-swap="innerHTML">Edit Image Prompt</button>
</div>
</div>
{% endblock %}
<div class="nb-panel p-4">
<div class="text-sm font-semibold mb-2">Registration</div>
<p class="text-xs opacity-60 mb-3">Toggle whether new people can sign up without an invite.</p>
<label class="flex items-center gap-3">
{% block registration_status_input %}
<form hx-patch="/toggle-registrations" hx-swap="outerHTML" hx-trigger="change">
<input name="registration_open" type="checkbox" class="nb-checkbox" {% if settings.registrations_enabled %}checked{% endif %} />
</form>
{% endblock %}
<span class="text-sm">Enable Registrations</span>
</label>
<div id="registration-status" class="text-xs opacity-70 mt-2"></div>
</div>
</section>

View File

@@ -45,8 +45,13 @@ hx-swap="outerHTML"
<label class="flex items-center gap-2">
<span class="text-xs uppercase tracking-wide opacity-70">Type</span>
<input type="text" name="relationship_type" value="{{ relationship_list.relationship_type }}"
class="nb-input w-28" placeholder="relates_to">
class="nb-input w-32" placeholder="RelatedTo" list="relationship-type-options">
</label>
<datalist id="relationship-type-options">
{% for rel_type in relationship_type_options %}
<option value="{{ rel_type }}"></option>
{% endfor %}
</datalist>
<button type="button" class="nb-btn btn-sm nb-cta sm:ml-2" hx-post="/knowledge-entity/suggestions"
hx-target="#relationship-list" hx-swap="outerHTML" hx-include="#modal_form">
Suggest Relationships
@@ -71,4 +76,4 @@ hx-swap="outerHTML"
<button type="submit" class="nb-btn nb-cta">
Create Entity
</button>
{% endblock %}
{% endblock %}

View File

@@ -9,7 +9,8 @@
</tr>
</thead>
<tbody>
{% for relationship in relationships %}
{% for row in relationships %}
{% set relationship = row.relationship %}
<tr>
<!-- Origin column -->
<td>
@@ -30,7 +31,7 @@
{{ relationship.out }}
{% endfor %}
</td>
<td class="uppercase tracking-wide text-xs">{{ relationship.metadata.relationship_type }}</td>
<td class="uppercase tracking-wide text-xs">{{ row.relationship_type_label }}</td>
<td>
<button class="nb-btn btn-xs" hx-delete="/knowledge-relationship/{{ relationship.id }}"
hx-target="#relationship_table_section" hx-swap="outerHTML">
@@ -61,7 +62,7 @@
</td>
<td>
<input id="relationship_type_input" name="relationship_type" type="text" placeholder="RelatedTo"
class="nb-input w-full new_relationship_input" />
class="nb-input w-full new_relationship_input" value="{{ default_relationship_type }}" />
</td>
<td>
<button id="save_relationship_button" type="button" class="nb-btn btn-sm" hx-post="/knowledge-relationship"
@@ -80,4 +81,4 @@
document.getElementById('save_relationship_button').click();
}
});
</script>
</script>

View File

@@ -40,7 +40,7 @@
{{ scratchpad.content[:100] }}{% if scratchpad.content|length > 100 %}...{% endif %}
</div>
<div class="text-xs text-base-content/50">
Last saved: {{ scratchpad.last_saved_at }}
Last saved: {{ scratchpad.last_saved_at | datetimeformat(format="short", tz=user.timezone) }}
</div>
</div>
{% else %}
@@ -76,9 +76,9 @@
<div class="flex items-start justify-between gap-3">
<div class="flex-1 min-w-0">
<h4 class="font-semibold text-base truncate" title="{{ scratchpad.title }}">{{ scratchpad.title }}</h4>
<div class="text-xs text-base-content/50">Archived {{ scratchpad.archived_at }}</div>
<div class="text-xs text-base-content/50">Archived {{ scratchpad.archived_at | datetimeformat(format="short", tz=user.timezone) }}</div>
{% if scratchpad.ingested_at %}
<div class="text-xs text-base-content/40">Ingestion started {{ scratchpad.ingested_at }}</div>
<div class="text-xs text-base-content/40">Ingestion started {{ scratchpad.ingested_at | datetimeformat(format="short", tz=user.timezone) }}</div>
{% endif %}
</div>
<div class="flex items-center gap-2 flex-shrink-0 flex-wrap justify-end">

View File

@@ -25,7 +25,7 @@
<div class="flex flex-col gap-3">
<div class="text-xs text-base-content/50 flex items-center gap-2">
<span>Last saved: <span id="last-saved">{{ scratchpad.last_saved_at }}</span></span>
<span>Last saved: <span id="last-saved">{{ scratchpad.last_saved_at | datetimeformat(format="short", tz=user.timezone) }}</span></span>
<span id="save-status"
class="inline-flex items-center gap-1 text-success opacity-0 transition-opacity duration-300 pointer-events-none">
{% include "icons/check_icon.html" %} <span class="uppercase tracking-wider text-[0.7em]">Saved</span>

View File

@@ -18,7 +18,8 @@ async-openai = { workspace = true }
surrealdb = { workspace = true }
dom_smoothie = { workspace = true }
tempfile = { workspace = true }
axum_typed_multipart = { workspace = true}
axum_typed_multipart = { workspace = true}
anyhow = { workspace = true }
reqwest = { workspace = true }
chrono = { workspace = true }
text-splitter = { workspace = true }
@@ -28,6 +29,7 @@ headless_chrome = { workspace = true }
base64 = { workspace = true }
pdf-extract = "0.9"
lopdf = "0.32"
bytes = { workspace = true }
common = { path = "../common" }
composite-retrieval = { path = "../composite-retrieval" }

View File

@@ -19,6 +19,7 @@ use common::{
error::AppError,
storage::{
db::SurrealDbClient,
store::StorageManager,
types::{
ingestion_payload::IngestionPayload,
ingestion_task::{IngestionTask, TaskErrorInfo},
@@ -47,12 +48,14 @@ impl IngestionPipeline {
openai_client: Arc<Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
storage: StorageManager,
) -> Result<Self, AppError> {
let services = DefaultPipelineServices::new(
db.clone(),
openai_client.clone(),
config.clone(),
reranker_pool,
storage,
);
Self::with_services(db, IngestionConfig::default(), Arc::new(services))

View File

@@ -2,6 +2,7 @@ use common::{
error::AppError,
storage::{
db::SurrealDbClient,
store::StorageManager,
types::{
ingestion_payload::IngestionPayload,
text_content::{TextContent, UrlInfo},
@@ -19,6 +20,7 @@ pub(crate) async fn to_text_content(
db: &SurrealDbClient,
config: &AppConfig,
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
storage: &StorageManager,
) -> Result<TextContent, AppError> {
match ingestion_payload {
IngestionPayload::Url {
@@ -27,7 +29,7 @@ pub(crate) async fn to_text_content(
category,
user_id,
} => {
let (article, file_info) = extract_text_from_url(&url, db, &user_id, config).await?;
let (article, file_info) = extract_text_from_url(&url, db, &user_id, storage).await?;
Ok(TextContent::new(
article.text_content.into(),
Some(context),
@@ -60,7 +62,8 @@ pub(crate) async fn to_text_content(
category,
user_id,
} => {
let text = extract_text_from_file(&file_info, db, openai_client, config).await?;
let text =
extract_text_from_file(&file_info, db, openai_client, config, storage).await?;
Ok(TextContent::new(
text,
Some(context),

View File

@@ -10,6 +10,7 @@ use common::{
error::AppError,
storage::{
db::SurrealDbClient,
store::StorageManager,
types::{
ingestion_payload::IngestionPayload, knowledge_entity::KnowledgeEntity,
knowledge_relationship::KnowledgeRelationship, system_settings::SystemSettings,
@@ -65,6 +66,7 @@ pub struct DefaultPipelineServices {
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
storage: StorageManager,
}
impl DefaultPipelineServices {
@@ -73,12 +75,14 @@ impl DefaultPipelineServices {
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
storage: StorageManager,
) -> Self {
Self {
db,
openai_client,
config,
reranker_pool,
storage,
}
}
@@ -144,7 +148,14 @@ impl PipelineServices for DefaultPipelineServices {
&self,
payload: IngestionPayload,
) -> Result<TextContent, AppError> {
to_text_content(payload, &self.db, &self.config, &self.openai_client).await
to_text_content(
payload,
&self.db,
&self.config,
&self.openai_client,
&self.storage,
)
.await
}
async fn retrieve_similar_entities(

View File

@@ -1,63 +1,200 @@
use anyhow::anyhow;
use common::{
error::AppError,
storage::{db::SurrealDbClient, store, types::file_info::FileInfo},
storage::{db::SurrealDbClient, store::StorageManager, types::file_info::FileInfo},
utils::config::AppConfig,
};
use std::{
env,
io::{Error as IoError, ErrorKind},
path::{Path, PathBuf},
};
use uuid::Uuid;
use super::{
audio_transcription::transcribe_audio_file, image_parsing::extract_text_from_image,
pdf_ingestion::extract_pdf_content,
};
struct TempPathGuard {
path: PathBuf,
}
impl TempPathGuard {
fn as_path(&self) -> &Path {
&self.path
}
}
impl Drop for TempPathGuard {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.path);
}
}
async fn materialize_temp_file(
bytes: &[u8],
extension: Option<&str>,
) -> Result<TempPathGuard, AppError> {
let mut path = env::temp_dir();
let mut file_name = format!("minne-ingest-{}", Uuid::new_v4());
if let Some(ext) = extension {
if !ext.is_empty() {
file_name.push('.');
file_name.push_str(ext);
}
}
path.push(file_name);
tokio::fs::write(&path, bytes).await?;
Ok(TempPathGuard { path })
}
async fn resolve_existing_local_path(storage: &StorageManager, location: &str) -> Option<PathBuf> {
let path = storage.resolve_local_path(location)?;
match tokio::fs::metadata(&path).await {
Ok(_) => Some(path),
Err(_) => None,
}
}
fn infer_extension(file_info: &FileInfo) -> Option<String> {
Path::new(&file_info.path)
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.to_string())
}
pub async fn extract_text_from_file(
file_info: &FileInfo,
db_client: &SurrealDbClient,
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
config: &AppConfig,
storage: &StorageManager,
) -> Result<String, AppError> {
let base_path = store::resolve_base_dir(config);
let absolute_path = base_path.join(&file_info.path);
let file_bytes = storage
.get(&file_info.path)
.await
.map_err(|e| AppError::from(anyhow!(e)))?;
let local_path = resolve_existing_local_path(storage, &file_info.path).await;
match file_info.mime_type.as_str() {
"text/plain" | "text/markdown" | "application/octet-stream" | "text/x-rust" => {
let content = tokio::fs::read_to_string(&absolute_path).await?;
let content = String::from_utf8(file_bytes.to_vec())
.map_err(|err| AppError::Io(IoError::new(ErrorKind::InvalidData, err)))?;
Ok(content)
}
"application/pdf" => {
extract_pdf_content(
&absolute_path,
if let Some(path) = local_path.as_ref() {
return extract_pdf_content(
path,
db_client,
openai_client,
&config.pdf_ingest_mode,
)
.await;
}
let temp_guard = materialize_temp_file(file_bytes.as_ref(), Some("pdf")).await?;
let result = extract_pdf_content(
temp_guard.as_path(),
db_client,
openai_client,
&config.pdf_ingest_mode,
)
.await
.await;
drop(temp_guard);
result
}
"image/png" | "image/jpeg" => {
let path_str = absolute_path
.to_str()
.ok_or_else(|| {
AppError::Processing(format!(
"Encountered a non-UTF8 path while reading image {}",
file_info.id
))
})?
.to_string();
let content = extract_text_from_image(&path_str, db_client, openai_client).await?;
let content =
extract_text_from_image(file_bytes.as_ref(), db_client, openai_client).await?;
Ok(content)
}
"audio/mpeg" | "audio/mp3" | "audio/wav" | "audio/x-wav" | "audio/webm" | "audio/mp4"
| "audio/ogg" | "audio/flac" => {
let path_str = absolute_path
.to_str()
.ok_or_else(|| {
if let Some(path) = local_path.as_ref() {
let path_str = path.to_str().ok_or_else(|| {
AppError::Processing(format!(
"Encountered a non-UTF8 path while reading audio {}",
file_info.id
))
})?
.to_string();
transcribe_audio_file(&path_str, db_client, openai_client).await
})?;
return transcribe_audio_file(path_str, db_client, openai_client).await;
}
let extension = infer_extension(file_info);
let temp_guard =
materialize_temp_file(file_bytes.as_ref(), extension.as_deref()).await?;
let path_str = temp_guard.as_path().to_str().ok_or_else(|| {
AppError::Processing(format!(
"Encountered a non-UTF8 path while reading audio {}",
file_info.id
))
})?;
let result = transcribe_audio_file(path_str, db_client, openai_client).await;
drop(temp_guard);
result
}
_ => Err(AppError::NotFound(file_info.mime_type.clone())),
}
}
#[cfg(test)]
mod tests {
use super::*;
use async_openai::{config::OpenAIConfig, Client};
use bytes::Bytes;
use chrono::Utc;
use common::{
storage::{db::SurrealDbClient, store::StorageManager},
utils::config::{AppConfig, StorageKind},
};
#[tokio::test]
async fn extracts_text_using_memory_storage_backend() {
let mut config = AppConfig::default();
config.storage = StorageKind::Memory;
let storage = StorageManager::new(&config)
.await
.expect("create storage manager");
let location = "user/test/file.txt";
let contents = b"hello from memory storage";
storage
.put(location, Bytes::from(contents.as_slice().to_vec()))
.await
.expect("write object");
let now = Utc::now();
let file_info = FileInfo {
id: "file".into(),
created_at: now,
updated_at: now,
sha256: "sha256".into(),
path: location.to_string(),
file_name: "file.txt".into(),
mime_type: "text/plain".into(),
user_id: "user".into(),
};
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("create surreal memory");
let openai_client = Client::with_config(OpenAIConfig::default());
let text = extract_text_from_file(&file_info, &db, &openai_client, &config, &storage)
.await
.expect("extract text");
assert_eq!(text, String::from_utf8_lossy(contents));
}
}

View File

@@ -10,14 +10,13 @@ use common::{
};
pub async fn extract_text_from_image(
path: &str,
image_bytes: &[u8],
db: &SurrealDbClient,
client: &async_openai::Client<async_openai::config::OpenAIConfig>,
) -> Result<String, AppError> {
let system_settings = SystemSettings::get_current(db).await?;
let image_bytes = tokio::fs::read(&path).await?;
let base64_image = STANDARD.encode(&image_bytes);
let base64_image = STANDARD.encode(image_bytes);
let image_url = format!("data:image/png;base64,{base64_image}");

View File

@@ -3,8 +3,7 @@ use axum_typed_multipart::{FieldData, FieldMetadata};
use chrono::Utc;
use common::{
error::AppError,
storage::{db::SurrealDbClient, types::file_info::FileInfo},
utils::config::AppConfig,
storage::{db::SurrealDbClient, store::StorageManager, types::file_info::FileInfo},
};
use dom_smoothie::{Article, Readability, TextMode};
use headless_chrome::Browser;
@@ -19,7 +18,7 @@ pub async fn extract_text_from_url(
url: &str,
db: &SurrealDbClient,
user_id: &str,
config: &AppConfig,
storage: &StorageManager,
) -> Result<(Article, FileInfo), AppError> {
info!("Fetching URL: {}", url);
let now = Instant::now();
@@ -81,7 +80,7 @@ pub async fn extract_text_from_url(
metadata,
};
let file_info = FileInfo::new(field_data, db, user_id, config).await?;
let file_info = FileInfo::new_with_storage(field_data, db, user_id, storage).await?;
let config = dom_smoothie::Config {
text_mode: TextMode::Markdown,

View File

@@ -1,6 +1,6 @@
[package]
name = "main"
version = "0.2.6"
version = "0.2.7"
edition = "2021"
repository = "https://github.com/perstarkse/minne"
license = "AGPL-3.0-or-later"

View File

@@ -1,6 +1,8 @@
use api_router::{api_routes_v1, api_state::ApiState};
use axum::{extract::FromRef, Router};
use common::{storage::db::SurrealDbClient, utils::config::get_config};
use common::{
storage::db::SurrealDbClient, storage::store::StorageManager, utils::config::get_config,
};
use composite_retrieval::reranking::RerankerPool;
use html_router::{html_routes, html_state::HtmlState};
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
@@ -46,18 +48,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
// Create global storage manager
let storage = StorageManager::new(&config).await?;
let html_state = HtmlState::new_with_resources(
db,
openai_client,
session_store,
storage.clone(),
config.clone(),
reranker_pool.clone(),
)?;
)
.await?;
let api_state = ApiState {
db: html_state.db.clone(),
config: config.clone(),
};
let api_state = ApiState::new(&config, storage.clone()).await?;
// Create Axum router
let app = Router::new()
@@ -115,6 +119,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
openai_client.clone(),
config.clone(),
reranker_pool.clone(),
storage.clone(), // Use the global storage manager
)
.await
.unwrap(),
@@ -147,6 +152,7 @@ struct AppState {
mod tests {
use super::*;
use axum::{body::Body, http::Request, http::StatusCode, Router};
use common::storage::store::StorageManager;
use common::utils::config::{AppConfig, PdfIngestMode, StorageKind};
use std::{path::Path, sync::Arc};
use tower::ServiceExt;
@@ -195,18 +201,25 @@ mod tests {
.with_api_base(&config.openai_base_url),
));
let storage = StorageManager::new(&config)
.await
.expect("failed to build storage manager");
let html_state = HtmlState::new_with_resources(
db.clone(),
openai_client,
session_store,
storage.clone(),
config.clone(),
None,
)
.await
.expect("failed to build html state");
let api_state = ApiState {
db: html_state.db.clone(),
db: db.clone(),
config: config.clone(),
storage,
};
let app = Router::new()

View File

@@ -2,7 +2,9 @@ use std::sync::Arc;
use api_router::{api_routes_v1, api_state::ApiState};
use axum::{extract::FromRef, Router};
use common::{storage::db::SurrealDbClient, utils::config::get_config};
use common::{
storage::db::SurrealDbClient, storage::store::StorageManager, utils::config::get_config,
};
use composite_retrieval::reranking::RerankerPool;
use html_router::{html_routes, html_state::HtmlState};
use tracing::info;
@@ -44,18 +46,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
// Create global storage manager
let storage = StorageManager::new(&config).await?;
let html_state = HtmlState::new_with_resources(
db,
openai_client,
session_store,
storage.clone(),
config.clone(),
reranker_pool,
)?;
)
.await?;
let api_state = ApiState {
db: html_state.db.clone(),
config: config.clone(),
};
let api_state = ApiState::new(&config, storage).await?;
// Create Axum router
let app = Router::new()

View File

@@ -1,6 +1,8 @@
use std::sync::Arc;
use common::{storage::db::SurrealDbClient, utils::config::get_config};
use common::{
storage::db::SurrealDbClient, storage::store::StorageManager, utils::config::get_config,
};
use composite_retrieval::reranking::RerankerPool;
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
@@ -35,8 +37,18 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
// Create global storage manager
let storage = StorageManager::new(&config).await?;
let ingestion_pipeline = Arc::new(
IngestionPipeline::new(db.clone(), openai_client.clone(), config, reranker_pool).await?,
IngestionPipeline::new(
db.clone(),
openai_client.clone(),
config,
reranker_pool,
storage,
)
.await?,
);
run_worker_loop(db, ingestion_pipeline).await