chore: harden common storage bootstrap and slim embedded db assets

Unify embedding config, build providers from system settings, and fail
startup when index builds error or time out. Move Surreal assets under
common/db so embeds exclude crate source, and read storage via streams.
This commit is contained in:
Per Stark
2026-05-29 12:26:26 +02:00
parent 93d11b66eb
commit e3bb2935d0
62 changed files with 672 additions and 443 deletions
+58 -1
View File
@@ -2,12 +2,14 @@ mod bootstrap;
use std::sync::Arc;
use bootstrap::{init, prepare_embedding_runtime};
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
use tracing::info;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let services = bootstrap::init().await?;
let services = init().await?;
prepare_embedding_runtime(&services).await?;
info!(
embedding_backend = ?services.config.embedding_backend,
@@ -25,3 +27,58 @@ async fn main() -> anyhow::Result<()> {
run_worker_loop(services.db, ingestion_pipeline).await
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use std::time::Duration;
use chrono::Utc;
use common::storage::types::ingestion_task::{IngestionTask, DEFAULT_LEASE_SECS};
use ingestion_pipeline::pipeline::IngestionPipeline;
use crate::bootstrap::tests::init_smoke_services;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn worker_smoke_initializes_and_claims_idle() -> anyhow::Result<()> {
let (services, data_dir) = init_smoke_services().await?;
let pipeline = IngestionPipeline::new(
Arc::clone(&services.db),
Arc::clone(&services.openai_client),
services.config.clone(),
services.reranker_pool.clone(),
services.storage,
Arc::clone(&services.embedding_provider),
)?;
let worker_id = "worker-smoke";
let claimed = IngestionTask::claim_next_ready(
&services.db,
worker_id,
Utc::now(),
Duration::from_secs(DEFAULT_LEASE_SECS as u64),
)
.await?;
assert!(
claimed.is_none(),
"worker smoke test should find no pending tasks"
);
let db = Arc::clone(&services.db);
let pipeline = Arc::new(pipeline);
let worker = tokio::spawn(async move {
ingestion_pipeline::run_worker_loop(db, pipeline).await
});
tokio::time::sleep(Duration::from_millis(250)).await;
assert!(
!worker.is_finished(),
"worker loop should keep running while idle"
);
worker.abort();
tokio::fs::remove_dir_all(&data_dir).await.ok();
Ok(())
}
}