mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-30 18:11:34 +02:00
4cd428185f
Ingest relies on SurrealDB incremental index maintenance; the worker runs native REBUILD INDEX on a configurable interval with lease state on system_settings.
70 lines
2.4 KiB
Rust
70 lines
2.4 KiB
Rust
#![allow(clippy::missing_docs_in_private_items, clippy::result_large_err)]
|
|
|
|
pub mod pipeline;
|
|
pub mod utils;
|
|
|
|
use chrono::Utc;
|
|
use common::storage::{
|
|
db::SurrealDbClient,
|
|
indexes::maybe_run_scheduled_index_rebuild,
|
|
types::ingestion_task::{IngestionTask, DEFAULT_LEASE_SECS},
|
|
};
|
|
pub use pipeline::{
|
|
EmbeddedKnowledgeEntity, EmbeddedTextChunk, IngestionConfig, IngestionPipeline,
|
|
IngestionTuning, PipelineArtifacts,
|
|
};
|
|
use std::sync::Arc;
|
|
use tokio::time::{sleep, Duration};
|
|
use tracing::{error, info, warn};
|
|
use uuid::Uuid;
|
|
|
|
/// How long the worker sleeps when no task is ready to claim.
|
|
const WORKER_IDLE_BACKOFF_MS: u64 = 500;
|
|
/// How long the worker sleeps after a transient claim error before retrying.
|
|
const WORKER_CLAIM_ERROR_BACKOFF_MS: u64 = 1_000;
|
|
|
|
pub async fn run_worker_loop(
|
|
db: Arc<SurrealDbClient>,
|
|
ingestion_pipeline: Arc<IngestionPipeline>,
|
|
index_rebuild_interval_secs: u64,
|
|
) -> anyhow::Result<()> {
|
|
let worker_id = format!("ingestion-worker-{}", Uuid::new_v4());
|
|
let lease_duration = Duration::from_secs(DEFAULT_LEASE_SECS as u64);
|
|
let idle_backoff = Duration::from_millis(WORKER_IDLE_BACKOFF_MS);
|
|
let claim_error_backoff = Duration::from_millis(WORKER_CLAIM_ERROR_BACKOFF_MS);
|
|
|
|
loop {
|
|
match IngestionTask::claim_next_ready(&db, &worker_id, Utc::now(), lease_duration).await {
|
|
Ok(Some(task)) => {
|
|
let task_id = task.id.clone();
|
|
info!(
|
|
%worker_id,
|
|
%task_id,
|
|
attempt = task.attempts,
|
|
"claimed ingestion task"
|
|
);
|
|
if let Err(err) = ingestion_pipeline.process_task(task).await {
|
|
error!(%worker_id, %task_id, error = %err, "ingestion task failed");
|
|
}
|
|
}
|
|
Ok(None) => {
|
|
maybe_run_scheduled_index_rebuild(
|
|
db.as_ref(),
|
|
&worker_id,
|
|
index_rebuild_interval_secs,
|
|
)
|
|
.await;
|
|
sleep(idle_backoff).await;
|
|
}
|
|
Err(err) => {
|
|
error!(%worker_id, error = %err, "failed to claim ingestion task");
|
|
warn!(
|
|
backoff_ms = WORKER_CLAIM_ERROR_BACKOFF_MS,
|
|
"Backing off after claim error"
|
|
);
|
|
sleep(claim_error_backoff).await;
|
|
}
|
|
}
|
|
}
|
|
}
|