mirror of
https://github.com/perstarkse/minne.git
synced 2026-04-24 09:48:32 +02:00
ingestion-pipeline crated init, begun moving
This commit is contained in:
@@ -45,6 +45,7 @@ url = { version = "2.5.2", features = ["serde"] }
|
||||
uuid = { version = "1.10.0", features = ["v4", "serde"] }
|
||||
|
||||
# Reference to api-router
|
||||
ingestion-pipeline = { path = "../ingestion-pipeline" }
|
||||
api-router = { path = "../api-router" }
|
||||
html-router = { path = "../html-router" }
|
||||
common = { path = "../common" }
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::{
|
||||
ingress::content_processor::ContentProcessor,
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
types::job::{Job, JobStatus},
|
||||
types::ingestion_task::{IngestionTask, IngestionTaskStatus},
|
||||
},
|
||||
utils::config::get_config,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use ingestion_pipeline::pipeline::IngestionPipeline;
|
||||
use surrealdb::Action;
|
||||
use tracing::{error, info};
|
||||
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
||||
@@ -37,23 +37,23 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
let openai_client = Arc::new(async_openai::Client::new());
|
||||
|
||||
let content_processor = ContentProcessor::new(db.clone(), openai_client.clone()).await?;
|
||||
let ingestion_pipeline = IngestionPipeline::new(db.clone(), openai_client.clone()).await?;
|
||||
|
||||
loop {
|
||||
// First, check for any unfinished jobs
|
||||
let unfinished_jobs = Job::get_unfinished_jobs(&db).await?;
|
||||
// First, check for any unfinished tasks
|
||||
let unfinished_tasks = IngestionTask::get_unfinished_tasks(&db).await?;
|
||||
|
||||
if !unfinished_jobs.is_empty() {
|
||||
info!("Found {} unfinished jobs", unfinished_jobs.len());
|
||||
if !unfinished_tasks.is_empty() {
|
||||
info!("Found {} unfinished jobs", unfinished_tasks.len());
|
||||
|
||||
for job in unfinished_jobs {
|
||||
content_processor.process_job(job).await?;
|
||||
for task in unfinished_tasks {
|
||||
ingestion_pipeline.process_task(task).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// If no unfinished jobs, start listening for new ones
|
||||
info!("Listening for new jobs...");
|
||||
let mut job_stream = Job::listen_for_jobs(&db).await?;
|
||||
let mut job_stream = IngestionTask::listen_for_tasks(&db).await?;
|
||||
|
||||
while let Some(notification) = job_stream.next().await {
|
||||
match notification {
|
||||
@@ -62,41 +62,42 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
match notification.action {
|
||||
Action::Create => {
|
||||
if let Err(e) = content_processor.process_job(notification.data).await {
|
||||
error!("Error processing job: {}", e);
|
||||
if let Err(e) = ingestion_pipeline.process_task(notification.data).await
|
||||
{
|
||||
error!("Error processing task: {}", e);
|
||||
}
|
||||
}
|
||||
Action::Update => {
|
||||
match notification.data.status {
|
||||
JobStatus::Completed
|
||||
| JobStatus::Error(_)
|
||||
| JobStatus::Cancelled => {
|
||||
IngestionTaskStatus::Completed
|
||||
| IngestionTaskStatus::Error(_)
|
||||
| IngestionTaskStatus::Cancelled => {
|
||||
info!(
|
||||
"Skipping already completed/error/cancelled job: {}",
|
||||
"Skipping already completed/error/cancelled task: {}",
|
||||
notification.data.id
|
||||
);
|
||||
continue;
|
||||
}
|
||||
JobStatus::InProgress { attempts, .. } => {
|
||||
IngestionTaskStatus::InProgress { attempts, .. } => {
|
||||
// Only process if this is a retry after an error, not our own update
|
||||
if let Ok(Some(current_job)) =
|
||||
db.get_item::<Job>(¬ification.data.id).await
|
||||
if let Ok(Some(current_task)) =
|
||||
db.get_item::<IngestionTask>(¬ification.data.id).await
|
||||
{
|
||||
match current_job.status {
|
||||
JobStatus::Error(_)
|
||||
match current_task.status {
|
||||
IngestionTaskStatus::Error(_)
|
||||
if attempts
|
||||
< common::storage::types::job::MAX_ATTEMPTS =>
|
||||
< common::storage::types::ingestion_task::MAX_ATTEMPTS =>
|
||||
{
|
||||
// This is a retry after an error
|
||||
if let Err(e) =
|
||||
content_processor.process_job(current_job).await
|
||||
ingestion_pipeline.process_task(current_task).await
|
||||
{
|
||||
error!("Error processing job retry: {}", e);
|
||||
error!("Error processing task retry: {}", e);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
info!(
|
||||
"Skipping in-progress update for job: {}",
|
||||
"Skipping in-progress update for task: {}",
|
||||
notification.data.id
|
||||
);
|
||||
continue;
|
||||
@@ -104,12 +105,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
}
|
||||
}
|
||||
}
|
||||
JobStatus::Created => {
|
||||
IngestionTaskStatus::Created => {
|
||||
// Shouldn't happen with Update action, but process if it does
|
||||
if let Err(e) =
|
||||
content_processor.process_job(notification.data).await
|
||||
ingestion_pipeline.process_task(notification.data).await
|
||||
{
|
||||
error!("Error processing job: {}", e);
|
||||
error!("Error processing task: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -122,7 +123,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
}
|
||||
|
||||
// If we reach here, the stream has ended (connection lost?)
|
||||
error!("Job stream ended unexpectedly, reconnecting...");
|
||||
error!("Database stream ended unexpectedly, reconnecting...");
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user