mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-28 12:56:26 +02:00
chore: harden common storage bootstrap and slim embedded db assets
Unify embedding config, build providers from system settings, and fail startup when index builds error or time out. Move Surreal assets under common/db so embeds exclude crate source, and read storage via streams.
This commit is contained in:
+58
-1
@@ -2,12 +2,14 @@ mod bootstrap;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use bootstrap::{init, prepare_embedding_runtime};
|
||||
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
|
||||
use tracing::info;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let services = bootstrap::init().await?;
|
||||
let services = init().await?;
|
||||
prepare_embedding_runtime(&services).await?;
|
||||
|
||||
info!(
|
||||
embedding_backend = ?services.config.embedding_backend,
|
||||
@@ -25,3 +27,58 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
run_worker_loop(services.db, ingestion_pipeline).await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::Utc;
|
||||
use common::storage::types::ingestion_task::{IngestionTask, DEFAULT_LEASE_SECS};
|
||||
use ingestion_pipeline::pipeline::IngestionPipeline;
|
||||
|
||||
use crate::bootstrap::tests::init_smoke_services;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn worker_smoke_initializes_and_claims_idle() -> anyhow::Result<()> {
|
||||
let (services, data_dir) = init_smoke_services().await?;
|
||||
|
||||
let pipeline = IngestionPipeline::new(
|
||||
Arc::clone(&services.db),
|
||||
Arc::clone(&services.openai_client),
|
||||
services.config.clone(),
|
||||
services.reranker_pool.clone(),
|
||||
services.storage,
|
||||
Arc::clone(&services.embedding_provider),
|
||||
)?;
|
||||
|
||||
let worker_id = "worker-smoke";
|
||||
let claimed = IngestionTask::claim_next_ready(
|
||||
&services.db,
|
||||
worker_id,
|
||||
Utc::now(),
|
||||
Duration::from_secs(DEFAULT_LEASE_SECS as u64),
|
||||
)
|
||||
.await?;
|
||||
assert!(
|
||||
claimed.is_none(),
|
||||
"worker smoke test should find no pending tasks"
|
||||
);
|
||||
|
||||
let db = Arc::clone(&services.db);
|
||||
let pipeline = Arc::new(pipeline);
|
||||
let worker = tokio::spawn(async move {
|
||||
ingestion_pipeline::run_worker_loop(db, pipeline).await
|
||||
});
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(250)).await;
|
||||
assert!(
|
||||
!worker.is_finished(),
|
||||
"worker loop should keep running while idle"
|
||||
);
|
||||
worker.abort();
|
||||
|
||||
tokio::fs::remove_dir_all(&data_dir).await.ok();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user