ingestion-pipeline crated init, begun moving

2026-06-12 09:14:27 +02:00 · 2025-03-06 15:29:13 +01:00
parent ef1478547e
commit 1a641db503
21 changed files with 648 additions and 601 deletions
@@ -1,162 +1 @@
-use std::{sync::Arc, time::Instant};

-use chrono::Utc;
-use text_splitter::TextSplitter;
-use tracing::{debug, info};
-
-use crate::{
-    error::AppError,
-    storage::{
-        db::SurrealDbClient,
-        types::{
-            job::{Job, JobStatus, MAX_ATTEMPTS},
-            knowledge_entity::KnowledgeEntity,
-            knowledge_relationship::KnowledgeRelationship,
-            text_chunk::TextChunk,
-            text_content::TextContent,
-        },
-    },
-    utils::embedding::generate_embedding,
-};
-
-use super::analysis::{
-    ingress_analyser::IngressAnalyzer, types::llm_analysis_result::LLMGraphAnalysisResult,
-};
-
-pub struct ContentProcessor {
-    db: Arc<SurrealDbClient>,
-    openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
-}
-
-impl ContentProcessor {
-    pub async fn new(
-        db: Arc<SurrealDbClient>,
-        openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
-    ) -> Result<Self, AppError> {
-        Ok(Self { db, openai_client })
-    }
-    pub async fn process_job(&self, job: Job) -> Result<(), AppError> {
-        let current_attempts = match job.status {
-            JobStatus::InProgress { attempts, .. } => attempts + 1,
-            _ => 1,
-        };
-
-        // Update status to InProgress with attempt count
-        Job::update_status(
-            &job.id,
-            JobStatus::InProgress {
-                attempts: current_attempts,
-                last_attempt: Utc::now(),
-            },
-            &self.db,
-        )
-        .await?;
-
-        let text_content = job.content.to_text_content(&self.openai_client).await?;
-
-        match self.process(&text_content).await {
-            Ok(_) => {
-                Job::update_status(&job.id, JobStatus::Completed, &self.db).await?;
-                Ok(())
-            }
-            Err(e) => {
-                if current_attempts >= MAX_ATTEMPTS {
-                    Job::update_status(
-                        &job.id,
-                        JobStatus::Error(format!("Max attempts reached: {}", e)),
-                        &self.db,
-                    )
-                    .await?;
-                }
-                Err(AppError::Processing(e.to_string()))
-            }
-        }
-    }
-
-    pub async fn process(&self, content: &TextContent) -> Result<(), AppError> {
-        let now = Instant::now();
-
-        // Perform analyis, this step also includes retrieval
-        let analysis = self.perform_semantic_analysis(content).await?;
-
-        let end = now.elapsed();
-        info!(
-            "{:?} time elapsed during creation of entities and relationships",
-            end
-        );
-
-        // Convert analysis to objects
-        let (entities, relationships) = analysis
-            .to_database_entities(&content.id, &content.user_id, &self.openai_client)
-            .await?;
-
-        // Store everything
-        tokio::try_join!(
-            self.store_graph_entities(entities, relationships),
-            self.store_vector_chunks(content),
-        )?;
-
-        // Store original content
-        self.db.store_item(content.to_owned()).await?;
-
-        self.db.rebuild_indexes().await?;
-        Ok(())
-    }
-
-    async fn perform_semantic_analysis(
-        &self,
-        content: &TextContent,
-    ) -> Result<LLMGraphAnalysisResult, AppError> {
-        let analyser = IngressAnalyzer::new(&self.db, &self.openai_client);
-        analyser
-            .analyze_content(
-                &content.category,
-                &content.instructions,
-                &content.text,
-                &content.user_id,
-            )
-            .await
-    }
-
-    async fn store_graph_entities(
-        &self,
-        entities: Vec<KnowledgeEntity>,
-        relationships: Vec<KnowledgeRelationship>,
-    ) -> Result<(), AppError> {
-        for entity in &entities {
-            debug!("Storing entity: {:?}", entity);
-            self.db.store_item(entity.clone()).await?;
-        }
-
-        for relationship in &relationships {
-            debug!("Storing relationship: {:?}", relationship);
-            relationship.store_relationship(&self.db).await?;
-        }
-
-        info!(
-            "Stored {} entities and {} relationships",
-            entities.len(),
-            relationships.len()
-        );
-        Ok(())
-    }
-
-    async fn store_vector_chunks(&self, content: &TextContent) -> Result<(), AppError> {
-        let splitter = TextSplitter::new(500..2000);
-        let chunks = splitter.chunks(&content.text);
-
-        // Could potentially process chunks in parallel with a bounded concurrent limit
-        for chunk in chunks {
-            let embedding = generate_embedding(&self.openai_client, chunk).await?;
-            let text_chunk = TextChunk::new(
-                content.id.to_string(),
-                chunk.to_string(),
-                embedding,
-                content.user_id.to_string(),
-            );
-            self.db.store_item(text_chunk).await?;
-        }
-
-        Ok(())
-    }
-}
@@ -1,345 +0,0 @@
-use std::{sync::Arc, time::Duration};
-
-use crate::{
-    error::AppError,
-    storage::types::{file_info::FileInfo, text_content::TextContent},
-};
-use async_openai::types::{
-    ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
-    CreateChatCompletionRequestArgs,
-};
-use reqwest;
-use scraper::{Html, Selector};
-use serde::{Deserialize, Serialize};
-use std::fmt::Write;
-use tiktoken_rs::{o200k_base, CoreBPE};
-use tracing::info;
-use url::Url;
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub enum IngressObject {
-    Url {
-        url: String,
-        instructions: String,
-        category: String,
-        user_id: String,
-    },
-    Text {
-        text: String,
-        instructions: String,
-        category: String,
-        user_id: String,
-    },
-    File {
-        file_info: FileInfo,
-        instructions: String,
-        category: String,
-        user_id: String,
-    },
-}
-
-impl IngressObject {
-    /// Creates ingress objects from the provided content, instructions, and files.
-    ///
-    /// # Arguments
-    /// * `content` - Optional textual content to be ingressed
-    /// * `instructions` - Instructions for processing the ingress content
-    /// * `category` - Category to classify the ingressed content
-    /// * `files` - Vector of `FileInfo` objects containing information about uploaded files
-    /// * `user_id` - Identifier of the user performing the ingress operation
-    ///
-    /// # Returns
-    /// * `Result<Vec<IngressObject>, AppError>` - On success, returns a vector of ingress objects
-    ///   (one per file/content type). On failure, returns an `AppError`.
-    pub fn create_ingress_objects(
-        content: Option<String>,
-        instructions: String,
-        category: String,
-        files: Vec<FileInfo>,
-        user_id: &str,
-    ) -> Result<Vec<IngressObject>, AppError> {
-        // Initialize list
-        let mut object_list = Vec::new();
-
-        // Create a IngressObject from content if it exists, checking for URL or text
-        if let Some(input_content) = content {
-            match Url::parse(&input_content) {
-                Ok(url) => {
-                    info!("Detected URL: {}", url);
-                    object_list.push(IngressObject::Url {
-                        url: url.to_string(),
-                        instructions: instructions.clone(),
-                        category: category.clone(),
-                        user_id: user_id.into(),
-                    });
-                }
-                Err(_) => {
-                    if input_content.len() > 2 {
-                        info!("Treating input as plain text");
-                        object_list.push(IngressObject::Text {
-                            text: input_content.to_string(),
-                            instructions: instructions.clone(),
-                            category: category.clone(),
-                            user_id: user_id.into(),
-                        });
-                    }
-                }
-            }
-        }
-
-        for file in files {
-            object_list.push(IngressObject::File {
-                file_info: file,
-                instructions: instructions.clone(),
-                category: category.clone(),
-                user_id: user_id.into(),
-            })
-        }
-
-        // If no objects are constructed, we return Err
-        if object_list.is_empty() {
-            return Err(AppError::NotFound(
-                "No valid content or files provided".into(),
-            ));
-        }
-
-        Ok(object_list)
-    }
-    /// Creates a new `TextContent` instance from a `IngressObject`.
-    ///
-    /// # Arguments
-    /// `&self` - A reference to the `IngressObject`.
-    ///
-    /// # Returns
-    /// `TextContent` - An object containing a text representation of the object, could be a scraped URL, parsed PDF, etc.
-    pub async fn to_text_content(
-        &self,
-        openai_client: &Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
-    ) -> Result<TextContent, AppError> {
-        match self {
-            IngressObject::Url {
-                url,
-                instructions,
-                category,
-                user_id,
-            } => {
-                let text = Self::fetch_text_from_url(url, openai_client).await?;
-                Ok(TextContent::new(
-                    text,
-                    instructions.into(),
-                    category.into(),
-                    None,
-                    Some(url.into()),
-                    user_id.into(),
-                ))
-            }
-            IngressObject::Text {
-                text,
-                instructions,
-                category,
-                user_id,
-            } => Ok(TextContent::new(
-                text.into(),
-                instructions.into(),
-                category.into(),
-                None,
-                None,
-                user_id.into(),
-            )),
-            IngressObject::File {
-                file_info,
-                instructions,
-                category,
-                user_id,
-            } => {
-                let text = Self::extract_text_from_file(file_info).await?;
-                Ok(TextContent::new(
-                    text,
-                    instructions.into(),
-                    category.into(),
-                    Some(file_info.to_owned()),
-                    None,
-                    user_id.into(),
-                ))
-            }
-        }
-    }
-
-    /// Get text from url, will return it as a markdown formatted string
-    async fn fetch_text_from_url(
-        url: &str,
-        openai_client: &Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
-    ) -> Result<String, AppError> {
-        // Use a client with timeouts and reuse
-        let client = reqwest::ClientBuilder::new()
-            .timeout(Duration::from_secs(30))
-            .build()?;
-        let response = client.get(url).send().await?.text().await?;
-
-        // Preallocate string with capacity
-        let mut structured_content = String::with_capacity(response.len() / 2);
-
-        let document = Html::parse_document(&response);
-        let main_selectors = Selector::parse(
-            "article, main, .article-content, .post-content, .entry-content, [role='main']",
-        )
-        .unwrap();
-
-        let content_element = document
-            .select(&main_selectors)
-            .next()
-            .or_else(|| document.select(&Selector::parse("body").unwrap()).next())
-            .ok_or(AppError::NotFound("No content found".into()))?;
-
-        // Compile selectors once
-        let heading_selector = Selector::parse("h1, h2, h3").unwrap();
-        let paragraph_selector = Selector::parse("p").unwrap();
-
-        // Process content in one pass
-        for element in content_element.select(&heading_selector) {
-            let _ = writeln!(
-                structured_content,
-                "<heading>{}</heading>",
-                element.text().collect::<String>().trim()
-            );
-        }
-        for element in content_element.select(&paragraph_selector) {
-            let _ = writeln!(
-                structured_content,
-                "<paragraph>{}</paragraph>",
-                element.text().collect::<String>().trim()
-            );
-        }
-
-        let content = structured_content
-            .replace(|c: char| c.is_control(), " ")
-            .replace("  ", " ");
-        Self::process_web_content(content, openai_client).await
-    }
-
-    pub async fn process_web_content(
-        content: String,
-        openai_client: &Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
-    ) -> Result<String, AppError> {
-        const MAX_TOKENS: usize = 122000;
-        const SYSTEM_PROMPT: &str = r#"
-        You are a precise content extractor for web pages. Your task:
-
-        1. Extract ONLY the main article/content from the provided text
-        2. Maintain the original content - do not summarize or modify the core information
-        3. Ignore peripheral content such as:
-            - Navigation elements
-            - Error messages (e.g., "JavaScript required")
-            - Related articles sections
-            - Comments
-            - Social media links
-            - Advertisement text
-
-        FORMAT:
-        - Convert <heading> tags to markdown headings (#, ##, ###)
-        - Convert <paragraph> tags to markdown paragraphs
-        - Preserve quotes and important formatting
-        - Remove duplicate content
-        - Remove any metadata or technical artifacts
-
-        OUTPUT RULES:
-        - Output ONLY the cleaned content in markdown
-        - Do not add any explanations or meta-commentary
-        - Do not add summaries or conclusions
-        - Do not use any XML/HTML tags in the output
-    "#;
-
-        let bpe = o200k_base()?;
-
-        // Process content in chunks if needed
-        let truncated_content = if bpe.encode_with_special_tokens(&content).len() > MAX_TOKENS {
-            Self::truncate_content(&content, MAX_TOKENS, &bpe)?
-        } else {
-            content
-        };
-
-        let request = CreateChatCompletionRequestArgs::default()
-            .model("gpt-4o-mini")
-            .temperature(0.0)
-            .max_tokens(16200u32)
-            .messages([
-                ChatCompletionRequestSystemMessage::from(SYSTEM_PROMPT).into(),
-                ChatCompletionRequestUserMessage::from(truncated_content).into(),
-            ])
-            .build()?;
-
-        let response = openai_client.chat().create(request).await?;
-
-        response
-            .choices
-            .first()
-            .and_then(|choice| choice.message.content.as_ref())
-            .map(|content| content.to_owned())
-            .ok_or(AppError::LLMParsing("No content in response".into()))
-    }
-
-    fn truncate_content(
-        content: &str,
-        max_tokens: usize,
-        tokenizer: &CoreBPE,
-    ) -> Result<String, AppError> {
-        // Pre-allocate with estimated size
-        let mut result = String::with_capacity(content.len() / 2);
-        let mut current_tokens = 0;
-
-        // Process content by paragraph to maintain context
-        for paragraph in content.split("\n\n") {
-            let tokens = tokenizer.encode_with_special_tokens(paragraph).len();
-
-            // Check if adding paragraph exceeds limit
-            if current_tokens + tokens > max_tokens {
-                break;
-            }
-
-            result.push_str(paragraph);
-            result.push_str("\n\n");
-            current_tokens += tokens;
-        }
-
-        // Ensure we return valid content
-        if result.is_empty() {
-            return Err(AppError::Processing("Content exceeds token limit".into()));
-        }
-
-        Ok(result.trim_end().to_string())
-    }
-
-    /// Extracts text from a file based on its MIME type.
-    async fn extract_text_from_file(file_info: &FileInfo) -> Result<String, AppError> {
-        match file_info.mime_type.as_str() {
-            "text/plain" => {
-                // Read the file and return its content
-                let content = tokio::fs::read_to_string(&file_info.path).await?;
-                Ok(content)
-            }
-            "text/markdown" => {
-                // Read the file and return its content
-                let content = tokio::fs::read_to_string(&file_info.path).await?;
-                Ok(content)
-            }
-            "application/pdf" => {
-                // TODO: Implement PDF text extraction using a crate like `pdf-extract` or `lopdf`
-                Err(AppError::NotFound(file_info.mime_type.clone()))
-            }
-            "image/png" | "image/jpeg" => {
-                // TODO: Implement OCR on image using a crate like `tesseract`
-                Err(AppError::NotFound(file_info.mime_type.clone()))
-            }
-            "application/octet-stream" => {
-                let content = tokio::fs::read_to_string(&file_info.path).await?;
-                Ok(content)
-            }
-            "text/x-rust" => {
-                let content = tokio::fs::read_to_string(&file_info.path).await?;
-                Ok(content)
-            }
-            // Handle other MIME types as needed
-            _ => Err(AppError::NotFound(file_info.mime_type.clone())),
-        }
-    }
-}
@@ -1,3 +1,2 @@
 pub mod analysis;
 pub mod content_processor;
-pub mod ingress_object;
@@ -1,6 +1,6 @@
 use crate::error::AppError;

-use super::types::{analytics::Analytics, job::Job, system_settings::SystemSettings, StoredObject};
+use super::types::{analytics::Analytics, system_settings::SystemSettings, StoredObject};
 use axum_session::{SessionConfig, SessionError, SessionStore};
 use axum_session_surreal::SessionSurrealPool;
 use futures::Stream;
@@ -171,9 +171,9 @@ impl SurrealDbClient {
    /// * `Result<Option<T>, Error>` - The deleted item or Error
    pub async fn listen<T>(
        &self,
-    ) -> Result<impl Stream<Item = Result<Notification<Job>, Error>>, Error>
+    ) -> Result<impl Stream<Item = Result<Notification<T>, Error>>, Error>
    where
-        T: for<'de> StoredObject,
+        T: for<'de> StoredObject + std::marker::Unpin,
    {
        self.client.select(T::table_name()).live().await
    }
@@ -0,0 +1,95 @@
+use crate::{error::AppError, storage::types::file_info::FileInfo};
+use serde::{Deserialize, Serialize};
+use tracing::info;
+use url::Url;
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub enum IngestionPayload {
+    Url {
+        url: String,
+        instructions: String,
+        category: String,
+        user_id: String,
+    },
+    Text {
+        text: String,
+        instructions: String,
+        category: String,
+        user_id: String,
+    },
+    File {
+        file_info: FileInfo,
+        instructions: String,
+        category: String,
+        user_id: String,
+    },
+}
+
+impl IngestionPayload {
+    /// Creates ingestion payloads from the provided content, instructions, and files.
+    ///
+    /// # Arguments
+    /// * `content` - Optional textual content to be ingressed
+    /// * `instructions` - Instructions for processing the ingress content
+    /// * `category` - Category to classify the ingressed content
+    /// * `files` - Vector of `FileInfo` objects containing information about uploaded files
+    /// * `user_id` - Identifier of the user performing the ingress operation
+    ///
+    /// # Returns
+    /// * `Result<Vec<IngestionPayload>, AppError>` - On success, returns a vector of ingress objects
+    ///   (one per file/content type). On failure, returns an `AppError`.
+    pub fn create_ingestion_payload(
+        content: Option<String>,
+        instructions: String,
+        category: String,
+        files: Vec<FileInfo>,
+        user_id: &str,
+    ) -> Result<Vec<IngestionPayload>, AppError> {
+        // Initialize list
+        let mut object_list = Vec::new();
+
+        // Create a IngestionPayload from content if it exists, checking for URL or text
+        if let Some(input_content) = content {
+            match Url::parse(&input_content) {
+                Ok(url) => {
+                    info!("Detected URL: {}", url);
+                    object_list.push(IngestionPayload::Url {
+                        url: url.to_string(),
+                        instructions: instructions.clone(),
+                        category: category.clone(),
+                        user_id: user_id.into(),
+                    });
+                }
+                Err(_) => {
+                    if input_content.len() > 2 {
+                        info!("Treating input as plain text");
+                        object_list.push(IngestionPayload::Text {
+                            text: input_content.to_string(),
+                            instructions: instructions.clone(),
+                            category: category.clone(),
+                            user_id: user_id.into(),
+                        });
+                    }
+                }
+            }
+        }
+
+        for file in files {
+            object_list.push(IngestionPayload::File {
+                file_info: file,
+                instructions: instructions.clone(),
+                category: category.clone(),
+                user_id: user_id.into(),
+            })
+        }
+
+        // If no objects are constructed, we return Err
+        if object_list.is_empty() {
+            return Err(AppError::NotFound(
+                "No valid content or files provided".into(),
+            ));
+        }
+
+        Ok(object_list)
+    }
+}
@@ -2,13 +2,12 @@ use futures::Stream;
 use surrealdb::{opt::PatchOp, Notification};
 use uuid::Uuid;

-use crate::{
-    error::AppError, ingress::ingress_object::IngressObject, storage::db::SurrealDbClient,
-    stored_object,
-};
+use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
+
+use super::ingestion_payload::IngestionPayload;

 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum JobStatus {
+pub enum IngestionTaskStatus {
    Created,
    InProgress {
        attempts: u32,
@@ -19,22 +18,22 @@ pub enum JobStatus {
    Cancelled,
 }

-stored_object!(Job, "job", {
-    content: IngressObject,
-    status: JobStatus,
+stored_object!(IngestionTask, "job", {
+    content: IngestionPayload,
+    status: IngestionTaskStatus,
    user_id: String
 });

 pub const MAX_ATTEMPTS: u32 = 3;

-impl Job {
-    pub async fn new(content: IngressObject, user_id: String) -> Self {
+impl IngestionTask {
+    pub async fn new(content: IngestionPayload, user_id: String) -> Self {
        let now = Utc::now();

        Self {
            id: Uuid::new_v4().to_string(),
            content,
-            status: JobStatus::Created,
+            status: IngestionTaskStatus::Created,
            created_at: now,
            updated_at: now,
            user_id,
@@ -43,7 +42,7 @@ impl Job {

    /// Creates a new job and stores it in the database
    pub async fn create_and_add_to_db(
-        content: IngressObject,
+        content: IngestionPayload,
        user_id: String,
        db: &SurrealDbClient,
    ) -> Result<(), AppError> {
@@ -57,10 +56,10 @@ impl Job {
    // Update job status
    pub async fn update_status(
        id: &str,
-        status: JobStatus,
+        status: IngestionTaskStatus,
        db: &SurrealDbClient,
    ) -> Result<(), AppError> {
-        let _job: Option<Job> = db
+        let _job: Option<Self> = db
            .update((Self::table_name(), id))
            .patch(PatchOp::replace("/status", status))
            .patch(PatchOp::replace(
@@ -73,16 +72,16 @@ impl Job {
    }

    /// Listen for new jobs
-    pub async fn listen_for_jobs(
+    pub async fn listen_for_tasks(
        db: &SurrealDbClient,
-    ) -> Result<impl Stream<Item = Result<Notification<Job>, surrealdb::Error>>, surrealdb::Error>
+    ) -> Result<impl Stream<Item = Result<Notification<Self>, surrealdb::Error>>, surrealdb::Error>
    {
-        db.listen::<Job>().await
+        db.listen::<Self>().await
    }

-    /// Get all unfinished jobs, ie newly created and in progress up two times
-    pub async fn get_unfinished_jobs(db: &SurrealDbClient) -> Result<Vec<Job>, AppError> {
-        let jobs: Vec<Job> = db
+    /// Get all unfinished tasks, ie newly created and in progress up two times
+    pub async fn get_unfinished_tasks(db: &SurrealDbClient) -> Result<Vec<Self>, AppError> {
+        let jobs: Vec<Self> = db
            .query(
                "SELECT * FROM type::table($table) 
             WHERE 
@@ -3,7 +3,8 @@ use serde::{Deserialize, Serialize};
 pub mod analytics;
 pub mod conversation;
 pub mod file_info;
-pub mod job;
+pub mod ingestion_payload;
+pub mod ingestion_task;
 pub mod knowledge_entity;
 pub mod knowledge_relationship;
 pub mod message;
@@ -4,7 +4,7 @@ use surrealdb::{engine::any::Any, Surreal};
 use uuid::Uuid;

 use super::{
-    conversation::Conversation, job::Job, knowledge_entity::KnowledgeEntity,
+    conversation::Conversation, ingestion_task::IngestionTask, knowledge_entity::KnowledgeEntity,
    knowledge_relationship::KnowledgeRelationship, system_settings::SystemSettings,
    text_content::TextContent,
 };
@@ -351,12 +351,12 @@ impl User {
        Ok(conversations)
    }

-    /// Gets all active jobs for the specified user
-    pub async fn get_unfinished_jobs(
+    /// Gets all active ingestion tasks for the specified user
+    pub async fn get_unfinished_ingestion_tasks(
        user_id: &str,
        db: &SurrealDbClient,
-    ) -> Result<Vec<Job>, AppError> {
-        let jobs: Vec<Job> = db
+    ) -> Result<Vec<IngestionTask>, AppError> {
+        let jobs: Vec<IngestionTask> = db
            .query(
                "SELECT * FROM type::table($table) 
             WHERE user_id = $user_id 
@@ -369,7 +369,7 @@ impl User {
             )
             ORDER BY created_at DESC",
            )
-            .bind(("table", Job::table_name()))
+            .bind(("table", IngestionTask::table_name()))
            .bind(("user_id", user_id.to_owned()))
            .bind(("max_attempts", 3))
            .await?
@@ -384,12 +384,12 @@ impl User {
        user_id: &str,
        db: &SurrealDbClient,
    ) -> Result<(), AppError> {
-        db.get_item::<Job>(id)
+        db.get_item::<IngestionTask>(id)
            .await?
            .filter(|job| job.user_id == user_id)
            .ok_or_else(|| AppError::Auth("Not authorized to delete this job".into()))?;

-        db.delete_item::<Job>(id)
+        db.delete_item::<IngestionTask>(id)
            .await
            .map_err(AppError::Database)?;