mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-30 03:10:45 +02:00
chore: harden common storage bootstrap and slim embedded db assets
Unify embedding config, build providers from system settings, and fail startup when index builds error or time out. Move Surreal assets under common/db so embeds exclude crate source, and read storage via streams.
This commit is contained in:
@@ -30,6 +30,7 @@ retrieval-pipeline = { path = "../retrieval-pipeline" }
|
||||
[dev-dependencies]
|
||||
tower = "0.5"
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
common = { path = "../common", features = ["test-utils"] }
|
||||
|
||||
[[bin]]
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_openai::Client;
|
||||
use common::{
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
store::StorageManager,
|
||||
},
|
||||
utils::{
|
||||
config::{get_config, AppConfig},
|
||||
embedding::EmbeddingProvider,
|
||||
},
|
||||
};
|
||||
use retrieval_pipeline::reranking::RerankerPool;
|
||||
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
||||
|
||||
pub struct SharedServices {
|
||||
pub db: Arc<SurrealDbClient>,
|
||||
pub openai_client: Arc<Client<async_openai::config::OpenAIConfig>>,
|
||||
pub embedding_provider: Arc<EmbeddingProvider>,
|
||||
pub storage: StorageManager,
|
||||
pub reranker_pool: Option<Arc<RerankerPool>>,
|
||||
pub config: AppConfig,
|
||||
}
|
||||
|
||||
pub async fn init() -> anyhow::Result<SharedServices> {
|
||||
tracing_subscriber::registry()
|
||||
.with(fmt::layer().with_writer(std::io::stderr))
|
||||
.with(EnvFilter::from_default_env())
|
||||
.try_init()
|
||||
.ok();
|
||||
|
||||
let config = get_config()?;
|
||||
init_with_config(config).await
|
||||
}
|
||||
|
||||
pub(crate) async fn init_with_config(config: AppConfig) -> anyhow::Result<SharedServices> {
|
||||
let db = Arc::new(
|
||||
SurrealDbClient::new(
|
||||
&config.surrealdb_address,
|
||||
&config.surrealdb_username,
|
||||
&config.surrealdb_password,
|
||||
&config.surrealdb_namespace,
|
||||
&config.surrealdb_database,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
db.apply_migrations().await?;
|
||||
|
||||
let openai_client = Arc::new(Client::with_config(
|
||||
async_openai::config::OpenAIConfig::new()
|
||||
.with_api_key(&config.openai_api_key)
|
||||
.with_api_base(&config.openai_base_url),
|
||||
));
|
||||
|
||||
let embedding_provider = Arc::new(
|
||||
EmbeddingProvider::from_config(&config, Some(Arc::clone(&openai_client))).await?,
|
||||
);
|
||||
|
||||
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
|
||||
|
||||
let storage = StorageManager::new(&config).await?;
|
||||
|
||||
Ok(SharedServices {
|
||||
db,
|
||||
openai_client,
|
||||
embedding_provider,
|
||||
storage,
|
||||
reranker_pool,
|
||||
config,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
mod startup;
|
||||
pub mod wiring;
|
||||
|
||||
pub use startup::prepare_embedding_runtime;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_openai::Client;
|
||||
use common::{
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
store::StorageManager,
|
||||
types::system_settings::SystemSettings,
|
||||
},
|
||||
utils::{
|
||||
config::{get_config, AppConfig},
|
||||
embedding::EmbeddingProvider,
|
||||
},
|
||||
};
|
||||
use retrieval_pipeline::reranking::RerankerPool;
|
||||
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
||||
|
||||
pub struct SharedServices {
|
||||
pub db: Arc<SurrealDbClient>,
|
||||
pub openai_client: Arc<Client<async_openai::config::OpenAIConfig>>,
|
||||
pub embedding_provider: Arc<EmbeddingProvider>,
|
||||
pub storage: StorageManager,
|
||||
pub reranker_pool: Option<Arc<RerankerPool>>,
|
||||
pub config: AppConfig,
|
||||
}
|
||||
|
||||
pub async fn init() -> anyhow::Result<SharedServices> {
|
||||
tracing_subscriber::registry()
|
||||
.with(fmt::layer().with_writer(std::io::stderr))
|
||||
.with(EnvFilter::from_default_env())
|
||||
.try_init()
|
||||
.ok();
|
||||
|
||||
let config = get_config()?;
|
||||
init_with_config(config).await
|
||||
}
|
||||
|
||||
pub(crate) async fn init_with_config(config: AppConfig) -> anyhow::Result<SharedServices> {
|
||||
let db = Arc::new(
|
||||
SurrealDbClient::new(
|
||||
&config.surrealdb_address,
|
||||
&config.surrealdb_username,
|
||||
&config.surrealdb_password,
|
||||
&config.surrealdb_namespace,
|
||||
&config.surrealdb_database,
|
||||
)
|
||||
.await
|
||||
.context("connect to surrealdb")?,
|
||||
);
|
||||
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.context("apply database migrations")?;
|
||||
|
||||
let settings = SystemSettings::get_current(&db)
|
||||
.await
|
||||
.context("load system settings")?;
|
||||
|
||||
let openai_client = Arc::new(Client::with_config(
|
||||
async_openai::config::OpenAIConfig::new()
|
||||
.with_api_key(&config.openai_api_key)
|
||||
.with_api_base(&config.openai_base_url),
|
||||
));
|
||||
|
||||
let embedding_provider = Arc::new(
|
||||
EmbeddingProvider::from_system_settings(
|
||||
&settings,
|
||||
&config,
|
||||
Some(Arc::clone(&openai_client)),
|
||||
)
|
||||
.await
|
||||
.context("initialize embedding provider")?,
|
||||
);
|
||||
|
||||
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
|
||||
|
||||
let storage = StorageManager::new(&config)
|
||||
.await
|
||||
.context("initialize storage manager")?;
|
||||
|
||||
Ok(SharedServices {
|
||||
db,
|
||||
openai_client,
|
||||
embedding_provider,
|
||||
storage,
|
||||
reranker_pool,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use common::utils::config::{AppConfig, EmbeddingBackend, PdfIngestMode, StorageKind};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub fn smoke_test_config(namespace: &str, database: &str, data_dir: &Path) -> AppConfig {
|
||||
AppConfig {
|
||||
openai_api_key: "test-key".into(),
|
||||
surrealdb_address: "mem://".into(),
|
||||
surrealdb_username: "root".into(),
|
||||
surrealdb_password: "root".into(),
|
||||
surrealdb_namespace: namespace.into(),
|
||||
surrealdb_database: database.into(),
|
||||
data_dir: data_dir.to_string_lossy().into_owned(),
|
||||
http_port: 0,
|
||||
openai_base_url: "https://example.com".into(),
|
||||
storage: StorageKind::Local,
|
||||
pdf_ingest_mode: PdfIngestMode::LlmFirst,
|
||||
embedding_backend: EmbeddingBackend::Hashed,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn init_smoke_services() -> anyhow::Result<(super::SharedServices, std::path::PathBuf)>
|
||||
{
|
||||
let namespace = "test_ns";
|
||||
let database = format!("test_db_{}", Uuid::new_v4());
|
||||
let data_dir = std::env::temp_dir().join(format!("minne_smoke_{}", Uuid::new_v4()));
|
||||
tokio::fs::create_dir_all(&data_dir)
|
||||
.await
|
||||
.context("create temp data directory")?;
|
||||
|
||||
let config = smoke_test_config(namespace, &database, &data_dir);
|
||||
let services = super::init_with_config(config).await?;
|
||||
Ok((services, data_dir))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
use anyhow::Context;
|
||||
use common::{
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
indexes::ensure_runtime,
|
||||
types::{
|
||||
knowledge_entity::KnowledgeEntity, system_settings::SystemSettings,
|
||||
text_chunk::TextChunk,
|
||||
},
|
||||
},
|
||||
utils::embedding::EmbeddingProvider,
|
||||
};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use super::SharedServices;
|
||||
|
||||
/// Syncs embedding settings, re-embeds stored vectors when dimensions change, and
|
||||
/// ensures runtime indexes match the active embedding dimension.
|
||||
pub async fn prepare_embedding_runtime(services: &SharedServices) -> anyhow::Result<SystemSettings> {
|
||||
let (settings, dimensions_changed) =
|
||||
SystemSettings::sync_from_embedding_provider(&services.db, &services.embedding_provider)
|
||||
.await
|
||||
.context("sync system settings from embedding provider")?;
|
||||
|
||||
if dimensions_changed {
|
||||
re_embed_all(
|
||||
&services.db,
|
||||
&services.embedding_provider,
|
||||
settings.embedding_dimensions,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
ensure_runtime(
|
||||
&services.db,
|
||||
settings.embedding_dimensions as usize,
|
||||
)
|
||||
.await
|
||||
.context("ensure runtime indexes")?;
|
||||
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
async fn re_embed_all(
|
||||
db: &SurrealDbClient,
|
||||
embedding_provider: &EmbeddingProvider,
|
||||
embedding_dimensions: u32,
|
||||
) -> anyhow::Result<()> {
|
||||
warn!(
|
||||
embedding_dimensions,
|
||||
"Embedding configuration changed; re-embedding existing data"
|
||||
);
|
||||
|
||||
info!("Re-embedding TextChunks");
|
||||
TextChunk::update_all_embeddings_with_provider(db, embedding_provider)
|
||||
.await
|
||||
.context("re-embed text chunks after embedding dimension change")?;
|
||||
|
||||
info!("Re-embedding KnowledgeEntities");
|
||||
KnowledgeEntity::update_all_embeddings_with_provider(db, embedding_provider)
|
||||
.await
|
||||
.context("re-embed knowledge entities after embedding dimension change")?;
|
||||
|
||||
info!("Re-embedding complete");
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use api_router::{api_routes_v1, api_state::ApiState};
|
||||
use axum::{extract::FromRef, Router};
|
||||
use html_router::{
|
||||
html_routes,
|
||||
html_state::{HtmlState, StateResources},
|
||||
};
|
||||
|
||||
use super::SharedServices;
|
||||
|
||||
/// Builds the Minne API and HTML route subtrees without fixing the outer Axum state
|
||||
/// type. SaaS consumers can merge additional routers and attach their own `AppState`
|
||||
/// as long as it implements `FromRef` for `ApiState` and `HtmlState`.
|
||||
pub fn minne_routes<S>(api_state: &ApiState, html_state: &HtmlState) -> Router<S>
|
||||
where
|
||||
S: Clone + Send + Sync + 'static,
|
||||
ApiState: FromRef<S>,
|
||||
HtmlState: FromRef<S>,
|
||||
{
|
||||
Router::new()
|
||||
.nest("/api/v1", api_routes_v1(api_state))
|
||||
.merge(html_routes(html_state))
|
||||
}
|
||||
|
||||
pub fn build_api_state(services: &SharedServices) -> ApiState {
|
||||
ApiState {
|
||||
db: Arc::clone(&services.db),
|
||||
config: services.config.clone(),
|
||||
storage: services.storage.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn build_html_state(services: &SharedServices) -> anyhow::Result<HtmlState> {
|
||||
let session_store = Arc::new(
|
||||
services
|
||||
.db
|
||||
.create_session_store()
|
||||
.await
|
||||
.context("create session store")?,
|
||||
);
|
||||
|
||||
Ok(HtmlState::new_with_resources(StateResources {
|
||||
db: Arc::clone(&services.db),
|
||||
openai_client: Arc::clone(&services.openai_client),
|
||||
session_store,
|
||||
storage: services.storage.clone(),
|
||||
config: services.config.clone(),
|
||||
reranker_pool: services.reranker_pool.clone(),
|
||||
embedding_provider: Arc::clone(&services.embedding_provider),
|
||||
template_engine: None,
|
||||
}))
|
||||
}
|
||||
+55
-178
@@ -2,50 +2,17 @@ mod bootstrap;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api_router::{api_routes_v1, api_state::ApiState};
|
||||
use axum::{extract::FromRef, Router};
|
||||
use common::{
|
||||
storage::{
|
||||
indexes::ensure_runtime,
|
||||
types::{
|
||||
knowledge_entity::KnowledgeEntity, system_settings::SystemSettings,
|
||||
text_chunk::TextChunk,
|
||||
},
|
||||
},
|
||||
};
|
||||
use html_router::{
|
||||
html_routes,
|
||||
html_state::{HtmlState, StateResources},
|
||||
use axum::extract::FromRef;
|
||||
use bootstrap::{
|
||||
init, prepare_embedding_runtime,
|
||||
wiring::{build_api_state, build_html_state, minne_routes},
|
||||
};
|
||||
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
|
||||
use tracing::{error, info, warn};
|
||||
use tokio::task::LocalSet;
|
||||
|
||||
fn spawn_server_thread(
|
||||
listener: tokio::net::TcpListener,
|
||||
app: Router,
|
||||
) -> std::thread::JoinHandle<()> {
|
||||
std::thread::spawn(move || {
|
||||
let rt = match tokio::runtime::Runtime::new() {
|
||||
Ok(rt) => rt,
|
||||
Err(e) => {
|
||||
error!("Failed to create server runtime: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
rt.block_on(async {
|
||||
if let Err(e) = axum::serve(listener, app).await {
|
||||
error!("Server error: {}", e);
|
||||
}
|
||||
});
|
||||
})
|
||||
}
|
||||
use tracing::info;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let services = bootstrap::init().await?;
|
||||
|
||||
let session_store = Arc::new(services.db.create_session_store().await?);
|
||||
let services = init().await?;
|
||||
|
||||
info!(
|
||||
embedding_backend = ?services.config.embedding_backend,
|
||||
@@ -53,64 +20,16 @@ async fn main() -> anyhow::Result<()> {
|
||||
"Embedding provider initialized"
|
||||
);
|
||||
|
||||
let (settings, dimensions_changed) =
|
||||
SystemSettings::sync_from_embedding_provider(&services.db, &services.embedding_provider)
|
||||
.await?;
|
||||
prepare_embedding_runtime(&services).await?;
|
||||
|
||||
if dimensions_changed {
|
||||
warn!(
|
||||
new_dimensions = settings.embedding_dimensions,
|
||||
"Embedding configuration changed; re-embedding existing data"
|
||||
);
|
||||
let html_state = build_html_state(&services).await?;
|
||||
let api_state = build_api_state(&services);
|
||||
|
||||
info!("Re-embedding TextChunks");
|
||||
if let Err(e) =
|
||||
TextChunk::update_all_embeddings_with_provider(&services.db, &services.embedding_provider)
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
"Failed to re-embed TextChunks: {}. Search results may be stale.",
|
||||
e
|
||||
);
|
||||
}
|
||||
|
||||
info!("Re-embedding KnowledgeEntities");
|
||||
if let Err(e) =
|
||||
KnowledgeEntity::update_all_embeddings_with_provider(&services.db, &services.embedding_provider)
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
"Failed to re-embed KnowledgeEntities: {}. Search results may be stale.",
|
||||
e
|
||||
);
|
||||
}
|
||||
|
||||
info!("Re-embedding complete.");
|
||||
}
|
||||
|
||||
ensure_runtime(&services.db, settings.embedding_dimensions as usize).await?;
|
||||
|
||||
let html_state = HtmlState::new_with_resources(StateResources {
|
||||
db: Arc::clone(&services.db),
|
||||
openai_client: Arc::clone(&services.openai_client),
|
||||
session_store,
|
||||
storage: services.storage.clone(),
|
||||
config: services.config.clone(),
|
||||
reranker_pool: services.reranker_pool.clone(),
|
||||
embedding_provider: Arc::clone(&services.embedding_provider),
|
||||
template_engine: None,
|
||||
let app = minne_routes(&api_state, &html_state).with_state(AppState {
|
||||
api_state,
|
||||
html_state,
|
||||
});
|
||||
|
||||
let api_state = ApiState::new(&services.config, services.storage.clone()).await?;
|
||||
|
||||
let app = Router::new()
|
||||
.nest("/api/v1", api_routes_v1(&api_state))
|
||||
.merge(html_routes(&html_state))
|
||||
.with_state(AppState {
|
||||
api_state,
|
||||
html_state,
|
||||
});
|
||||
|
||||
info!(
|
||||
"Starting server listening on 0.0.0.0:{}",
|
||||
services.config.http_port
|
||||
@@ -118,28 +37,32 @@ async fn main() -> anyhow::Result<()> {
|
||||
let serve_address = format!("0.0.0.0:{}", services.config.http_port);
|
||||
let listener = tokio::net::TcpListener::bind(serve_address).await?;
|
||||
|
||||
let server_handle = spawn_server_thread(listener, app);
|
||||
let worker_db = Arc::clone(&services.db);
|
||||
let worker_openai = Arc::clone(&services.openai_client);
|
||||
let worker_embedding = Arc::clone(&services.embedding_provider);
|
||||
let worker_config = services.config.clone();
|
||||
let worker_reranker = services.reranker_pool.clone();
|
||||
let worker_storage = services.storage.clone();
|
||||
|
||||
let ingestion_pipeline = Arc::new(IngestionPipeline::new(
|
||||
Arc::clone(&services.db),
|
||||
Arc::clone(&services.openai_client),
|
||||
services.config.clone(),
|
||||
services.reranker_pool.clone(),
|
||||
services.storage,
|
||||
Arc::clone(&services.embedding_provider),
|
||||
)?);
|
||||
|
||||
let local = LocalSet::new();
|
||||
local.spawn_local(async move {
|
||||
let server = tokio::spawn(async move { axum::serve(listener, app).await });
|
||||
let worker = tokio::spawn(async move {
|
||||
info!("Starting worker process");
|
||||
if let Err(e) = run_worker_loop(services.db, ingestion_pipeline).await {
|
||||
error!("Worker error: {}", e);
|
||||
}
|
||||
});
|
||||
local.await;
|
||||
|
||||
if let Err(e) = server_handle.join() {
|
||||
error!("Server thread panicked: {:?}", e);
|
||||
let ingestion_pipeline = Arc::new(IngestionPipeline::new(
|
||||
Arc::clone(&worker_db),
|
||||
worker_openai,
|
||||
worker_config,
|
||||
worker_reranker,
|
||||
worker_storage,
|
||||
worker_embedding,
|
||||
)?);
|
||||
|
||||
run_worker_loop(worker_db, ingestion_pipeline).await
|
||||
});
|
||||
|
||||
tokio::select! {
|
||||
result = server => result??,
|
||||
result = worker => result??,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -147,8 +70,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
#[derive(Clone, FromRef)]
|
||||
struct AppState {
|
||||
api_state: ApiState,
|
||||
html_state: HtmlState,
|
||||
api_state: api_router::api_state::ApiState,
|
||||
html_state: html_router::html_state::HtmlState,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -160,79 +83,33 @@ mod tests {
|
||||
response::Response,
|
||||
Router,
|
||||
};
|
||||
use common::storage::{
|
||||
db::SurrealDbClient,
|
||||
store::StorageManager,
|
||||
types::{system_settings::SystemSettings, user::User},
|
||||
use bootstrap::{
|
||||
prepare_embedding_runtime,
|
||||
tests::init_smoke_services,
|
||||
wiring::{build_api_state, build_html_state, minne_routes},
|
||||
};
|
||||
use common::utils::config::{AppConfig, EmbeddingBackend, PdfIngestMode, StorageKind};
|
||||
use std::{path::Path, sync::Arc};
|
||||
use common::storage::types::{system_settings::SystemSettings, user::User};
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn smoke_test_config(namespace: &str, database: &str, data_dir: &Path) -> AppConfig {
|
||||
AppConfig {
|
||||
openai_api_key: "test-key".into(),
|
||||
surrealdb_address: "mem://".into(),
|
||||
surrealdb_username: "root".into(),
|
||||
surrealdb_password: "root".into(),
|
||||
surrealdb_namespace: namespace.into(),
|
||||
surrealdb_database: database.into(),
|
||||
data_dir: data_dir.to_string_lossy().into_owned(),
|
||||
http_port: 0,
|
||||
openai_base_url: "https://example.com".into(),
|
||||
storage: StorageKind::Local,
|
||||
pdf_ingest_mode: PdfIngestMode::LlmFirst,
|
||||
embedding_backend: EmbeddingBackend::Hashed,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_test_app() -> (Router, Arc<SurrealDbClient>, std::path::PathBuf) {
|
||||
let namespace = "test_ns";
|
||||
let database = format!("test_db_{}", Uuid::new_v4());
|
||||
let data_dir = std::env::temp_dir().join(format!("minne_smoke_{}", Uuid::new_v4()));
|
||||
tokio::fs::create_dir_all(&data_dir).await
|
||||
.expect("failed to create temp data directory");
|
||||
|
||||
let config = smoke_test_config(namespace, &database, &data_dir);
|
||||
let services = crate::bootstrap::init_with_config(config.clone())
|
||||
async fn build_test_app() -> (Router, Arc<common::storage::db::SurrealDbClient>, std::path::PathBuf) {
|
||||
let (services, data_dir) = init_smoke_services()
|
||||
.await
|
||||
.expect("failed to init services");
|
||||
|
||||
let session_store = Arc::new(
|
||||
services
|
||||
.db
|
||||
.create_session_store()
|
||||
.await
|
||||
.expect("failed to create session store"),
|
||||
);
|
||||
prepare_embedding_runtime(&services)
|
||||
.await
|
||||
.expect("failed to prepare embedding runtime");
|
||||
|
||||
let html_state = HtmlState::new_with_resources(StateResources {
|
||||
db: Arc::clone(&services.db),
|
||||
openai_client: Arc::clone(&services.openai_client),
|
||||
session_store,
|
||||
storage: services.storage.clone(),
|
||||
config: services.config.clone(),
|
||||
reranker_pool: services.reranker_pool.clone(),
|
||||
embedding_provider: Arc::clone(&services.embedding_provider),
|
||||
template_engine: None,
|
||||
let html_state = build_html_state(&services)
|
||||
.await
|
||||
.expect("failed to build html state");
|
||||
let api_state = build_api_state(&services);
|
||||
|
||||
let app = minne_routes(&api_state, &html_state).with_state(AppState {
|
||||
api_state,
|
||||
html_state,
|
||||
});
|
||||
|
||||
let api_state = ApiState {
|
||||
db: Arc::clone(&services.db),
|
||||
config: services.config.clone(),
|
||||
storage: services.storage,
|
||||
};
|
||||
|
||||
let app = Router::new()
|
||||
.nest("/api/v1", api_routes_v1(&api_state))
|
||||
.merge(html_routes(&html_state))
|
||||
.with_state(AppState {
|
||||
api_state,
|
||||
html_state,
|
||||
});
|
||||
|
||||
(app, services.db, data_dir)
|
||||
}
|
||||
|
||||
|
||||
+13
-35
@@ -1,47 +1,25 @@
|
||||
mod bootstrap;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api_router::{api_routes_v1, api_state::ApiState};
|
||||
use axum::{extract::FromRef, Router};
|
||||
use common::storage::types::system_settings::SystemSettings;
|
||||
use html_router::{
|
||||
html_routes,
|
||||
html_state::{HtmlState, StateResources},
|
||||
use axum::extract::FromRef;
|
||||
use bootstrap::{
|
||||
init, prepare_embedding_runtime,
|
||||
wiring::{build_api_state, build_html_state, minne_routes},
|
||||
};
|
||||
use tracing::info;
|
||||
|
||||
#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let services = bootstrap::init().await?;
|
||||
let services = init().await?;
|
||||
prepare_embedding_runtime(&services).await?;
|
||||
|
||||
let session_store = Arc::new(services.db.create_session_store().await?);
|
||||
let html_state = build_html_state(&services).await?;
|
||||
let api_state = build_api_state(&services);
|
||||
|
||||
let (_settings, _dimensions_changed) =
|
||||
SystemSettings::sync_from_embedding_provider(&services.db, &services.embedding_provider)
|
||||
.await?;
|
||||
|
||||
let html_state = HtmlState::new_with_resources(StateResources {
|
||||
db: Arc::clone(&services.db),
|
||||
openai_client: Arc::clone(&services.openai_client),
|
||||
session_store,
|
||||
storage: services.storage.clone(),
|
||||
config: services.config.clone(),
|
||||
reranker_pool: services.reranker_pool.clone(),
|
||||
embedding_provider: Arc::clone(&services.embedding_provider),
|
||||
template_engine: None,
|
||||
let app = minne_routes(&api_state, &html_state).with_state(AppState {
|
||||
api_state,
|
||||
html_state,
|
||||
});
|
||||
|
||||
let api_state = ApiState::new(&services.config, services.storage).await?;
|
||||
|
||||
let app = Router::new()
|
||||
.nest("/api/v1", api_routes_v1(&api_state))
|
||||
.merge(html_routes(&html_state))
|
||||
.with_state(AppState {
|
||||
api_state,
|
||||
html_state,
|
||||
});
|
||||
|
||||
info!(
|
||||
"Starting server listening on 0.0.0.0:{}",
|
||||
services.config.http_port
|
||||
@@ -55,6 +33,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
#[derive(Clone, FromRef)]
|
||||
struct AppState {
|
||||
api_state: ApiState,
|
||||
html_state: HtmlState,
|
||||
api_state: api_router::api_state::ApiState,
|
||||
html_state: html_router::html_state::HtmlState,
|
||||
}
|
||||
|
||||
+58
-1
@@ -2,12 +2,14 @@ mod bootstrap;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use bootstrap::{init, prepare_embedding_runtime};
|
||||
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
|
||||
use tracing::info;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let services = bootstrap::init().await?;
|
||||
let services = init().await?;
|
||||
prepare_embedding_runtime(&services).await?;
|
||||
|
||||
info!(
|
||||
embedding_backend = ?services.config.embedding_backend,
|
||||
@@ -25,3 +27,58 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
run_worker_loop(services.db, ingestion_pipeline).await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::Utc;
|
||||
use common::storage::types::ingestion_task::{IngestionTask, DEFAULT_LEASE_SECS};
|
||||
use ingestion_pipeline::pipeline::IngestionPipeline;
|
||||
|
||||
use crate::bootstrap::tests::init_smoke_services;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn worker_smoke_initializes_and_claims_idle() -> anyhow::Result<()> {
|
||||
let (services, data_dir) = init_smoke_services().await?;
|
||||
|
||||
let pipeline = IngestionPipeline::new(
|
||||
Arc::clone(&services.db),
|
||||
Arc::clone(&services.openai_client),
|
||||
services.config.clone(),
|
||||
services.reranker_pool.clone(),
|
||||
services.storage,
|
||||
Arc::clone(&services.embedding_provider),
|
||||
)?;
|
||||
|
||||
let worker_id = "worker-smoke";
|
||||
let claimed = IngestionTask::claim_next_ready(
|
||||
&services.db,
|
||||
worker_id,
|
||||
Utc::now(),
|
||||
Duration::from_secs(DEFAULT_LEASE_SECS as u64),
|
||||
)
|
||||
.await?;
|
||||
assert!(
|
||||
claimed.is_none(),
|
||||
"worker smoke test should find no pending tasks"
|
||||
);
|
||||
|
||||
let db = Arc::clone(&services.db);
|
||||
let pipeline = Arc::new(pipeline);
|
||||
let worker = tokio::spawn(async move {
|
||||
ingestion_pipeline::run_worker_loop(db, pipeline).await
|
||||
});
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(250)).await;
|
||||
assert!(
|
||||
!worker.is_finished(),
|
||||
"worker loop should keep running while idle"
|
||||
);
|
||||
worker.abort();
|
||||
|
||||
tokio::fs::remove_dir_all(&data_dir).await.ok();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user