chore: clippy ingestion-pipeline

2026-01-11 20:50:24 +01:00 · 2025-10-16 20:36:39 +02:00
parent ab68bccb80
commit 3c97d8ead5
7 changed files with 28 additions and 35 deletions
--- a/ingestion-pipeline/src/enricher.rs
+++ b/ingestion-pipeline/src/enricher.rs
@@ -25,7 +25,7 @@ pub struct IngestionEnricher {
 }

 impl IngestionEnricher {
-    pub fn new(
+    pub const fn new(
        db_client: Arc<SurrealDbClient>,
        openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
    ) -> Self {
@@ -61,8 +61,7 @@ impl IngestionEnricher {
        user_id: &str,
    ) -> Result<Vec<RetrievedEntity>, AppError> {
        let input_text = format!(
-            "content: {}, category: {}, user_context: {:?}",
-            text, category, context
+            "content: {text}, category: {category}, user_context: {context:?}"
        );

        retrieve_entities(&self.db_client, &self.openai_client, &input_text, user_id).await
@@ -80,8 +79,7 @@ impl IngestionEnricher {
        let entities_json = format_entities_json(similar_entities);

        let user_message = format!(
-            "Category:\n{}\ncontext:\n{:?}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}",
-            category, context, text, entities_json
+            "Category:\n{category}\ncontext:\n{context:?}\nContent:\n{text}\nExisting KnowledgeEntities in database:\n{entities_json}"
        );

        debug!("Prepared LLM request message: {}", user_message);
@@ -122,7 +120,7 @@ impl IngestionEnricher {
            ))?;

        serde_json::from_str::<LLMEnrichmentResult>(content).map_err(|e| {
-            AppError::LLMParsing(format!("Failed to parse LLM response into analysis: {}", e))
+            AppError::LLMParsing(format!("Failed to parse LLM response into analysis: {e}"))
        })
    }
 }
--- a/ingestion-pipeline/src/pipeline.rs
+++ b/ingestion-pipeline/src/pipeline.rs
@@ -68,7 +68,7 @@ impl IngestionPipeline {
        .await?;

        match self.process(&text_content).await {
-            Ok(_) => {
+            Ok(()) => {
                processing_task.mark_succeeded(&self.db).await?;
                info!(%task_id, attempt, "ingestion task succeeded");
                Ok(())
@@ -173,7 +173,7 @@ impl IngestionPipeline {
        let entity_count = entities.len();
        let relationship_count = relationships.len();

-        const STORE_GRAPH_MUTATION: &str = r#"
+        const STORE_GRAPH_MUTATION: &str = r"
            BEGIN TRANSACTION;
            LET $entities = $entities;
            LET $relationships = $relationships;
@@ -192,7 +192,7 @@ impl IngestionPipeline {
            };

            COMMIT TRANSACTION;
-        "#;
+        ";

        const MAX_ATTEMPTS: usize = 3;
        const INITIAL_BACKOFF_MS: u64 = 50;
--- a/ingestion-pipeline/src/types/llm_enrichment_result.rs
+++ b/ingestion-pipeline/src/types/llm_enrichment_result.rs
@@ -49,7 +49,7 @@ impl LLMEnrichmentResult {
    /// # Arguments
    ///
    /// * `source_id` - A UUID representing the source identifier.
-    /// * `openai_client` - OpenAI client for LLM calls.
+    /// * `openai_client` - `OpenAI` client for LLM calls.
    ///
    /// # Returns
    ///
--- a/ingestion-pipeline/src/utils/audio_transcription.rs
+++ b/ingestion-pipeline/src/utils/audio_transcription.rs
@@ -4,7 +4,7 @@ use common::{
    storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
 };

-/// Transcribes an audio file using the configured OpenAI Whisper model.
+/// Transcribes an audio file using the configured `OpenAI` Whisper model.
 pub async fn transcribe_audio_file(
    file_path: &str,
    db_client: &SurrealDbClient,
@@ -23,6 +23,6 @@ pub async fn transcribe_audio_file(
        .audio()
        .transcribe(request)
        .await
-        .map_err(|e| AppError::Processing(format!("Audio transcription failed: {}", e)))?;
+        .map_err(|e| AppError::Processing(format!("Audio transcription failed: {e}")))?;
    Ok(response.text)
 }
--- a/ingestion-pipeline/src/utils/image_parsing.rs
+++ b/ingestion-pipeline/src/utils/image_parsing.rs
@@ -19,7 +19,7 @@ pub async fn extract_text_from_image(

    let base64_image = STANDARD.encode(&image_bytes);

-    let image_url = format!("data:image/png;base64,{}", base64_image);
+    let image_url = format!("data:image/png;base64,{base64_image}");

    let request = CreateChatCompletionRequestArgs::default()
        .model(system_settings.image_processing_model)
--- a/ingestion-pipeline/src/utils/mod.rs
+++ b/ingestion-pipeline/src/utils/mod.rs
@@ -15,13 +15,13 @@ pub struct GraphMapper {

 impl Default for GraphMapper {
    fn default() -> Self {
-        GraphMapper::new()
+        Self::new()
    }
 }

 impl GraphMapper {
    pub fn new() -> Self {
-        GraphMapper {
+        Self {
            key_to_id: HashMap::new(),
        }
    }
@@ -36,8 +36,7 @@ impl GraphMapper {
        // If parsing fails, look it up in the map.
        self.key_to_id.get(key).copied().ok_or_else(|| {
            AppError::GraphMapper(format!(
-                "Key '{}' is not a valid UUID and was not found in the map.",
-                key
+                "Key '{key}' is not a valid UUID and was not found in the map."
            ))
        })
    }
@@ -54,6 +53,6 @@ impl GraphMapper {
        self.key_to_id
            .get(key)
            .copied()
-            .ok_or_else(|| AppError::GraphMapper(format!("Key '{}' not found in map.", key)))
+            .ok_or_else(|| AppError::GraphMapper(format!("Key '{key}' not found in map.")))
    }
 }
--- a/ingestion-pipeline/src/utils/pdf_ingestion.rs
+++ b/ingestion-pipeline/src/utils/pdf_ingestion.rs
@@ -118,7 +118,7 @@ async fn load_page_numbers(pdf_bytes: Vec<u8>) -> Result<Vec<u32>, AppError> {
 /// Uses the existing headless Chrome dependency to rasterize the requested PDF pages into PNGs.
 async fn render_pdf_pages(file_path: &Path, pages: &[u32]) -> Result<Vec<Vec<u8>>, AppError> {
    let file_url = url::Url::from_file_path(file_path)
-        .map_err(|_| AppError::Processing("Unable to construct PDF file URL".into()))?;
+        .map_err(|()| AppError::Processing("Unable to construct PDF file URL".into()))?;

    let browser = create_browser()?;
    let tab = browser
@@ -133,8 +133,7 @@ async fn render_pdf_pages(file_path: &Path, pages: &[u32]) -> Result<Vec<Vec<u8>

    for (idx, page) in pages.iter().enumerate() {
        let target = format!(
-            "{}#page={}&toolbar=0&statusbar=0&zoom=page-fit",
-            file_url, page
+            "{file_url}#page={page}&toolbar=0&statusbar=0&zoom=page-fit"
        );
        tab.navigate_to(&target)
            .map_err(|err| AppError::Processing(format!("Failed to navigate to PDF page: {err}")))?
@@ -279,7 +278,7 @@ async fn vision_markdown(
    let mut markdown_sections = Vec::with_capacity(rendered_pages.len());

    for (batch_idx, chunk) in rendered_pages.chunks(PAGES_PER_VISION_CHUNK).enumerate() {
-        let total_image_bytes: usize = chunk.iter().map(|bytes| bytes.len()).sum();
+        let total_image_bytes: usize = chunk.iter().map(std::vec::Vec::len).sum();
        debug!(
            batch = batch_idx,
            pages = chunk.len(),
@@ -318,7 +317,7 @@ async fn vision_markdown(
            );

            for encoded in &encoded_images {
-                let image_url = format!("data:image/png;base64,{}", encoded);
+                let image_url = format!("data:image/png;base64,{encoded}");
                content_parts.push(
                    ChatCompletionRequestMessageContentPartImageArgs::default()
                        .image_url(
@@ -413,7 +412,7 @@ fn looks_good_enough(text: &str) -> bool {
        return false;
    }

-    let ascii_chars = text.chars().filter(|c| c.is_ascii()).count() as f64;
+    let ascii_chars = text.chars().filter(char::is_ascii).count() as f64;
    let ascii_ratio = ascii_chars / total_chars;
    if ascii_ratio < FAST_PATH_MIN_ASCII_RATIO {
        return false;
@@ -484,8 +483,7 @@ fn is_structural_line(line: &str) -> bool {
        || lowered
            .chars()
            .next()
-            .map(|c| c.is_ascii_digit())
-            .unwrap_or(false)
+            .is_some_and(|c| c.is_ascii_digit())
            && lowered.contains('.')
 }

@@ -572,14 +570,13 @@ fn prepare_pdf_viewer(tab: &headless_chrome::Tab, page_number: u32) {
                const toolbar = app.shadowRoot.querySelector('#toolbar');
                if (toolbar) {{ toolbar.style.display = 'none'; }}
            }}
-            const page = viewer.shadowRoot.querySelector('viewer-page:nth-of-type({page})');
+            const page = viewer.shadowRoot.querySelector('viewer-page:nth-of-type({page_number})');
            if (page && page.scrollIntoView) {{
                page.scrollIntoView({{ block: 'start', inline: 'center' }});
            }}
-            const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page}"]');
+            const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page_number}"]');
            return !!canvas;
-        }})()"#,
-        page = page_number
+        }})()"#
    );

    match tab.evaluate(&script, false) {
@@ -607,12 +604,11 @@ fn canvas_viewport_for_page(
            if (!embed || !embed.shadowRoot) return null;
            const viewer = embed.shadowRoot.querySelector('pdf-viewer');
            if (!viewer || !viewer.shadowRoot) return null;
-            const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page}"]');
+            const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page_number}"]');
            if (!canvas) return null;
            const rect = canvas.getBoundingClientRect();
            return {{ x: rect.x, y: rect.y, width: rect.width, height: rect.height }};
-        }})()"#,
-        page = page_number
+        }})()"#
    );

    let result = tab
@@ -683,7 +679,7 @@ fn capture_full_page_png(tab: &headless_chrome::Tab) -> Result<Vec<u8>, AppError
    })
 }

-fn is_suspicious_image(len: usize) -> bool {
+const fn is_suspicious_image(len: usize) -> bool {
    len < MIN_PAGE_IMAGE_BYTES
 }

@@ -710,7 +706,7 @@ fn is_low_quality_response(content: &str) -> bool {
    lowered.contains("unable to") || lowered.contains("cannot")
 }

-fn prompt_for_attempt(attempt: usize, base_prompt: &str) -> &str {
+const fn prompt_for_attempt(attempt: usize, base_prompt: &str) -> &str {
    if attempt == 0 {
        base_prompt
    } else {