diff --git a/ingestion-pipeline/src/enricher.rs b/ingestion-pipeline/src/enricher.rs index eab2c5b..b2e2652 100644 --- a/ingestion-pipeline/src/enricher.rs +++ b/ingestion-pipeline/src/enricher.rs @@ -25,7 +25,7 @@ pub struct IngestionEnricher { } impl IngestionEnricher { - pub fn new( + pub const fn new( db_client: Arc, openai_client: Arc>, ) -> Self { @@ -61,8 +61,7 @@ impl IngestionEnricher { user_id: &str, ) -> Result, AppError> { let input_text = format!( - "content: {}, category: {}, user_context: {:?}", - text, category, context + "content: {text}, category: {category}, user_context: {context:?}" ); retrieve_entities(&self.db_client, &self.openai_client, &input_text, user_id).await @@ -80,8 +79,7 @@ impl IngestionEnricher { let entities_json = format_entities_json(similar_entities); let user_message = format!( - "Category:\n{}\ncontext:\n{:?}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}", - category, context, text, entities_json + "Category:\n{category}\ncontext:\n{context:?}\nContent:\n{text}\nExisting KnowledgeEntities in database:\n{entities_json}" ); debug!("Prepared LLM request message: {}", user_message); @@ -122,7 +120,7 @@ impl IngestionEnricher { ))?; serde_json::from_str::(content).map_err(|e| { - AppError::LLMParsing(format!("Failed to parse LLM response into analysis: {}", e)) + AppError::LLMParsing(format!("Failed to parse LLM response into analysis: {e}")) }) } } diff --git a/ingestion-pipeline/src/pipeline.rs b/ingestion-pipeline/src/pipeline.rs index 58095b4..8c5e9ad 100644 --- a/ingestion-pipeline/src/pipeline.rs +++ b/ingestion-pipeline/src/pipeline.rs @@ -68,7 +68,7 @@ impl IngestionPipeline { .await?; match self.process(&text_content).await { - Ok(_) => { + Ok(()) => { processing_task.mark_succeeded(&self.db).await?; info!(%task_id, attempt, "ingestion task succeeded"); Ok(()) @@ -173,7 +173,7 @@ impl IngestionPipeline { let entity_count = entities.len(); let relationship_count = relationships.len(); - const STORE_GRAPH_MUTATION: &str = r#" + const STORE_GRAPH_MUTATION: &str = r" BEGIN TRANSACTION; LET $entities = $entities; LET $relationships = $relationships; @@ -192,7 +192,7 @@ impl IngestionPipeline { }; COMMIT TRANSACTION; - "#; + "; const MAX_ATTEMPTS: usize = 3; const INITIAL_BACKOFF_MS: u64 = 50; diff --git a/ingestion-pipeline/src/types/llm_enrichment_result.rs b/ingestion-pipeline/src/types/llm_enrichment_result.rs index 5898b8e..94b2fb4 100644 --- a/ingestion-pipeline/src/types/llm_enrichment_result.rs +++ b/ingestion-pipeline/src/types/llm_enrichment_result.rs @@ -49,7 +49,7 @@ impl LLMEnrichmentResult { /// # Arguments /// /// * `source_id` - A UUID representing the source identifier. - /// * `openai_client` - OpenAI client for LLM calls. + /// * `openai_client` - `OpenAI` client for LLM calls. /// /// # Returns /// diff --git a/ingestion-pipeline/src/utils/audio_transcription.rs b/ingestion-pipeline/src/utils/audio_transcription.rs index b1ebbf6..c528b6e 100644 --- a/ingestion-pipeline/src/utils/audio_transcription.rs +++ b/ingestion-pipeline/src/utils/audio_transcription.rs @@ -4,7 +4,7 @@ use common::{ storage::{db::SurrealDbClient, types::system_settings::SystemSettings}, }; -/// Transcribes an audio file using the configured OpenAI Whisper model. +/// Transcribes an audio file using the configured `OpenAI` Whisper model. pub async fn transcribe_audio_file( file_path: &str, db_client: &SurrealDbClient, @@ -23,6 +23,6 @@ pub async fn transcribe_audio_file( .audio() .transcribe(request) .await - .map_err(|e| AppError::Processing(format!("Audio transcription failed: {}", e)))?; + .map_err(|e| AppError::Processing(format!("Audio transcription failed: {e}")))?; Ok(response.text) } diff --git a/ingestion-pipeline/src/utils/image_parsing.rs b/ingestion-pipeline/src/utils/image_parsing.rs index 1b3295e..8dd184d 100644 --- a/ingestion-pipeline/src/utils/image_parsing.rs +++ b/ingestion-pipeline/src/utils/image_parsing.rs @@ -19,7 +19,7 @@ pub async fn extract_text_from_image( let base64_image = STANDARD.encode(&image_bytes); - let image_url = format!("data:image/png;base64,{}", base64_image); + let image_url = format!("data:image/png;base64,{base64_image}"); let request = CreateChatCompletionRequestArgs::default() .model(system_settings.image_processing_model) diff --git a/ingestion-pipeline/src/utils/mod.rs b/ingestion-pipeline/src/utils/mod.rs index 0559323..0a0b5c2 100644 --- a/ingestion-pipeline/src/utils/mod.rs +++ b/ingestion-pipeline/src/utils/mod.rs @@ -15,13 +15,13 @@ pub struct GraphMapper { impl Default for GraphMapper { fn default() -> Self { - GraphMapper::new() + Self::new() } } impl GraphMapper { pub fn new() -> Self { - GraphMapper { + Self { key_to_id: HashMap::new(), } } @@ -36,8 +36,7 @@ impl GraphMapper { // If parsing fails, look it up in the map. self.key_to_id.get(key).copied().ok_or_else(|| { AppError::GraphMapper(format!( - "Key '{}' is not a valid UUID and was not found in the map.", - key + "Key '{key}' is not a valid UUID and was not found in the map." )) }) } @@ -54,6 +53,6 @@ impl GraphMapper { self.key_to_id .get(key) .copied() - .ok_or_else(|| AppError::GraphMapper(format!("Key '{}' not found in map.", key))) + .ok_or_else(|| AppError::GraphMapper(format!("Key '{key}' not found in map."))) } } diff --git a/ingestion-pipeline/src/utils/pdf_ingestion.rs b/ingestion-pipeline/src/utils/pdf_ingestion.rs index 9ee1688..efe09e6 100644 --- a/ingestion-pipeline/src/utils/pdf_ingestion.rs +++ b/ingestion-pipeline/src/utils/pdf_ingestion.rs @@ -118,7 +118,7 @@ async fn load_page_numbers(pdf_bytes: Vec) -> Result, AppError> { /// Uses the existing headless Chrome dependency to rasterize the requested PDF pages into PNGs. async fn render_pdf_pages(file_path: &Path, pages: &[u32]) -> Result>, AppError> { let file_url = url::Url::from_file_path(file_path) - .map_err(|_| AppError::Processing("Unable to construct PDF file URL".into()))?; + .map_err(|()| AppError::Processing("Unable to construct PDF file URL".into()))?; let browser = create_browser()?; let tab = browser @@ -133,8 +133,7 @@ async fn render_pdf_pages(file_path: &Path, pages: &[u32]) -> Result for (idx, page) in pages.iter().enumerate() { let target = format!( - "{}#page={}&toolbar=0&statusbar=0&zoom=page-fit", - file_url, page + "{file_url}#page={page}&toolbar=0&statusbar=0&zoom=page-fit" ); tab.navigate_to(&target) .map_err(|err| AppError::Processing(format!("Failed to navigate to PDF page: {err}")))? @@ -279,7 +278,7 @@ async fn vision_markdown( let mut markdown_sections = Vec::with_capacity(rendered_pages.len()); for (batch_idx, chunk) in rendered_pages.chunks(PAGES_PER_VISION_CHUNK).enumerate() { - let total_image_bytes: usize = chunk.iter().map(|bytes| bytes.len()).sum(); + let total_image_bytes: usize = chunk.iter().map(std::vec::Vec::len).sum(); debug!( batch = batch_idx, pages = chunk.len(), @@ -318,7 +317,7 @@ async fn vision_markdown( ); for encoded in &encoded_images { - let image_url = format!("data:image/png;base64,{}", encoded); + let image_url = format!("data:image/png;base64,{encoded}"); content_parts.push( ChatCompletionRequestMessageContentPartImageArgs::default() .image_url( @@ -413,7 +412,7 @@ fn looks_good_enough(text: &str) -> bool { return false; } - let ascii_chars = text.chars().filter(|c| c.is_ascii()).count() as f64; + let ascii_chars = text.chars().filter(char::is_ascii).count() as f64; let ascii_ratio = ascii_chars / total_chars; if ascii_ratio < FAST_PATH_MIN_ASCII_RATIO { return false; @@ -484,8 +483,7 @@ fn is_structural_line(line: &str) -> bool { || lowered .chars() .next() - .map(|c| c.is_ascii_digit()) - .unwrap_or(false) + .is_some_and(|c| c.is_ascii_digit()) && lowered.contains('.') } @@ -572,14 +570,13 @@ fn prepare_pdf_viewer(tab: &headless_chrome::Tab, page_number: u32) { const toolbar = app.shadowRoot.querySelector('#toolbar'); if (toolbar) {{ toolbar.style.display = 'none'; }} }} - const page = viewer.shadowRoot.querySelector('viewer-page:nth-of-type({page})'); + const page = viewer.shadowRoot.querySelector('viewer-page:nth-of-type({page_number})'); if (page && page.scrollIntoView) {{ page.scrollIntoView({{ block: 'start', inline: 'center' }}); }} - const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page}"]'); + const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page_number}"]'); return !!canvas; - }})()"#, - page = page_number + }})()"# ); match tab.evaluate(&script, false) { @@ -607,12 +604,11 @@ fn canvas_viewport_for_page( if (!embed || !embed.shadowRoot) return null; const viewer = embed.shadowRoot.querySelector('pdf-viewer'); if (!viewer || !viewer.shadowRoot) return null; - const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page}"]'); + const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page_number}"]'); if (!canvas) return null; const rect = canvas.getBoundingClientRect(); return {{ x: rect.x, y: rect.y, width: rect.width, height: rect.height }}; - }})()"#, - page = page_number + }})()"# ); let result = tab @@ -683,7 +679,7 @@ fn capture_full_page_png(tab: &headless_chrome::Tab) -> Result, AppError }) } -fn is_suspicious_image(len: usize) -> bool { +const fn is_suspicious_image(len: usize) -> bool { len < MIN_PAGE_IMAGE_BYTES } @@ -710,7 +706,7 @@ fn is_low_quality_response(content: &str) -> bool { lowered.contains("unable to") || lowered.contains("cannot") } -fn prompt_for_attempt(attempt: usize, base_prompt: &str) -> &str { +const fn prompt_for_attempt(attempt: usize, base_prompt: &str) -> &str { if attempt == 0 { base_prompt } else {