mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-16 19:24:23 +02:00
e3bb2935d0
Unify embedding config, build providers from system settings, and fail startup when index builds error or time out. Move Surreal assets under common/db so embeds exclude crate source, and read storage via streams.
85 lines
2.5 KiB
Rust
85 lines
2.5 KiB
Rust
mod bootstrap;
|
|
|
|
use std::sync::Arc;
|
|
|
|
use bootstrap::{init, prepare_embedding_runtime};
|
|
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
|
|
use tracing::info;
|
|
|
|
#[tokio::main]
|
|
async fn main() -> anyhow::Result<()> {
|
|
let services = init().await?;
|
|
prepare_embedding_runtime(&services).await?;
|
|
|
|
info!(
|
|
embedding_backend = ?services.config.embedding_backend,
|
|
"Embedding provider initialized for worker"
|
|
);
|
|
|
|
let ingestion_pipeline = Arc::new(IngestionPipeline::new(
|
|
Arc::clone(&services.db),
|
|
Arc::clone(&services.openai_client),
|
|
services.config.clone(),
|
|
services.reranker_pool.clone(),
|
|
services.storage,
|
|
Arc::clone(&services.embedding_provider),
|
|
)?);
|
|
|
|
run_worker_loop(services.db, ingestion_pipeline).await
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
|
|
use chrono::Utc;
|
|
use common::storage::types::ingestion_task::{IngestionTask, DEFAULT_LEASE_SECS};
|
|
use ingestion_pipeline::pipeline::IngestionPipeline;
|
|
|
|
use crate::bootstrap::tests::init_smoke_services;
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn worker_smoke_initializes_and_claims_idle() -> anyhow::Result<()> {
|
|
let (services, data_dir) = init_smoke_services().await?;
|
|
|
|
let pipeline = IngestionPipeline::new(
|
|
Arc::clone(&services.db),
|
|
Arc::clone(&services.openai_client),
|
|
services.config.clone(),
|
|
services.reranker_pool.clone(),
|
|
services.storage,
|
|
Arc::clone(&services.embedding_provider),
|
|
)?;
|
|
|
|
let worker_id = "worker-smoke";
|
|
let claimed = IngestionTask::claim_next_ready(
|
|
&services.db,
|
|
worker_id,
|
|
Utc::now(),
|
|
Duration::from_secs(DEFAULT_LEASE_SECS as u64),
|
|
)
|
|
.await?;
|
|
assert!(
|
|
claimed.is_none(),
|
|
"worker smoke test should find no pending tasks"
|
|
);
|
|
|
|
let db = Arc::clone(&services.db);
|
|
let pipeline = Arc::new(pipeline);
|
|
let worker = tokio::spawn(async move {
|
|
ingestion_pipeline::run_worker_loop(db, pipeline).await
|
|
});
|
|
|
|
tokio::time::sleep(Duration::from_millis(250)).await;
|
|
assert!(
|
|
!worker.is_finished(),
|
|
"worker loop should keep running while idle"
|
|
);
|
|
worker.abort();
|
|
|
|
tokio::fs::remove_dir_all(&data_dir).await.ok();
|
|
Ok(())
|
|
}
|
|
}
|