chore: clippy ingestion-pipeline

This commit is contained in:
Per Stark
2025-10-16 20:36:39 +02:00
parent ab68bccb80
commit 3c97d8ead5
7 changed files with 28 additions and 35 deletions

View File

@@ -25,7 +25,7 @@ pub struct IngestionEnricher {
}
impl IngestionEnricher {
pub fn new(
pub const fn new(
db_client: Arc<SurrealDbClient>,
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
) -> Self {
@@ -61,8 +61,7 @@ impl IngestionEnricher {
user_id: &str,
) -> Result<Vec<RetrievedEntity>, AppError> {
let input_text = format!(
"content: {}, category: {}, user_context: {:?}",
text, category, context
"content: {text}, category: {category}, user_context: {context:?}"
);
retrieve_entities(&self.db_client, &self.openai_client, &input_text, user_id).await
@@ -80,8 +79,7 @@ impl IngestionEnricher {
let entities_json = format_entities_json(similar_entities);
let user_message = format!(
"Category:\n{}\ncontext:\n{:?}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}",
category, context, text, entities_json
"Category:\n{category}\ncontext:\n{context:?}\nContent:\n{text}\nExisting KnowledgeEntities in database:\n{entities_json}"
);
debug!("Prepared LLM request message: {}", user_message);
@@ -122,7 +120,7 @@ impl IngestionEnricher {
))?;
serde_json::from_str::<LLMEnrichmentResult>(content).map_err(|e| {
AppError::LLMParsing(format!("Failed to parse LLM response into analysis: {}", e))
AppError::LLMParsing(format!("Failed to parse LLM response into analysis: {e}"))
})
}
}

View File

@@ -68,7 +68,7 @@ impl IngestionPipeline {
.await?;
match self.process(&text_content).await {
Ok(_) => {
Ok(()) => {
processing_task.mark_succeeded(&self.db).await?;
info!(%task_id, attempt, "ingestion task succeeded");
Ok(())
@@ -173,7 +173,7 @@ impl IngestionPipeline {
let entity_count = entities.len();
let relationship_count = relationships.len();
const STORE_GRAPH_MUTATION: &str = r#"
const STORE_GRAPH_MUTATION: &str = r"
BEGIN TRANSACTION;
LET $entities = $entities;
LET $relationships = $relationships;
@@ -192,7 +192,7 @@ impl IngestionPipeline {
};
COMMIT TRANSACTION;
"#;
";
const MAX_ATTEMPTS: usize = 3;
const INITIAL_BACKOFF_MS: u64 = 50;

View File

@@ -49,7 +49,7 @@ impl LLMEnrichmentResult {
/// # Arguments
///
/// * `source_id` - A UUID representing the source identifier.
/// * `openai_client` - OpenAI client for LLM calls.
/// * `openai_client` - `OpenAI` client for LLM calls.
///
/// # Returns
///

View File

@@ -4,7 +4,7 @@ use common::{
storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
};
/// Transcribes an audio file using the configured OpenAI Whisper model.
/// Transcribes an audio file using the configured `OpenAI` Whisper model.
pub async fn transcribe_audio_file(
file_path: &str,
db_client: &SurrealDbClient,
@@ -23,6 +23,6 @@ pub async fn transcribe_audio_file(
.audio()
.transcribe(request)
.await
.map_err(|e| AppError::Processing(format!("Audio transcription failed: {}", e)))?;
.map_err(|e| AppError::Processing(format!("Audio transcription failed: {e}")))?;
Ok(response.text)
}

View File

@@ -19,7 +19,7 @@ pub async fn extract_text_from_image(
let base64_image = STANDARD.encode(&image_bytes);
let image_url = format!("data:image/png;base64,{}", base64_image);
let image_url = format!("data:image/png;base64,{base64_image}");
let request = CreateChatCompletionRequestArgs::default()
.model(system_settings.image_processing_model)

View File

@@ -15,13 +15,13 @@ pub struct GraphMapper {
impl Default for GraphMapper {
fn default() -> Self {
GraphMapper::new()
Self::new()
}
}
impl GraphMapper {
pub fn new() -> Self {
GraphMapper {
Self {
key_to_id: HashMap::new(),
}
}
@@ -36,8 +36,7 @@ impl GraphMapper {
// If parsing fails, look it up in the map.
self.key_to_id.get(key).copied().ok_or_else(|| {
AppError::GraphMapper(format!(
"Key '{}' is not a valid UUID and was not found in the map.",
key
"Key '{key}' is not a valid UUID and was not found in the map."
))
})
}
@@ -54,6 +53,6 @@ impl GraphMapper {
self.key_to_id
.get(key)
.copied()
.ok_or_else(|| AppError::GraphMapper(format!("Key '{}' not found in map.", key)))
.ok_or_else(|| AppError::GraphMapper(format!("Key '{key}' not found in map.")))
}
}

View File

@@ -118,7 +118,7 @@ async fn load_page_numbers(pdf_bytes: Vec<u8>) -> Result<Vec<u32>, AppError> {
/// Uses the existing headless Chrome dependency to rasterize the requested PDF pages into PNGs.
async fn render_pdf_pages(file_path: &Path, pages: &[u32]) -> Result<Vec<Vec<u8>>, AppError> {
let file_url = url::Url::from_file_path(file_path)
.map_err(|_| AppError::Processing("Unable to construct PDF file URL".into()))?;
.map_err(|()| AppError::Processing("Unable to construct PDF file URL".into()))?;
let browser = create_browser()?;
let tab = browser
@@ -133,8 +133,7 @@ async fn render_pdf_pages(file_path: &Path, pages: &[u32]) -> Result<Vec<Vec<u8>
for (idx, page) in pages.iter().enumerate() {
let target = format!(
"{}#page={}&toolbar=0&statusbar=0&zoom=page-fit",
file_url, page
"{file_url}#page={page}&toolbar=0&statusbar=0&zoom=page-fit"
);
tab.navigate_to(&target)
.map_err(|err| AppError::Processing(format!("Failed to navigate to PDF page: {err}")))?
@@ -279,7 +278,7 @@ async fn vision_markdown(
let mut markdown_sections = Vec::with_capacity(rendered_pages.len());
for (batch_idx, chunk) in rendered_pages.chunks(PAGES_PER_VISION_CHUNK).enumerate() {
let total_image_bytes: usize = chunk.iter().map(|bytes| bytes.len()).sum();
let total_image_bytes: usize = chunk.iter().map(std::vec::Vec::len).sum();
debug!(
batch = batch_idx,
pages = chunk.len(),
@@ -318,7 +317,7 @@ async fn vision_markdown(
);
for encoded in &encoded_images {
let image_url = format!("data:image/png;base64,{}", encoded);
let image_url = format!("data:image/png;base64,{encoded}");
content_parts.push(
ChatCompletionRequestMessageContentPartImageArgs::default()
.image_url(
@@ -413,7 +412,7 @@ fn looks_good_enough(text: &str) -> bool {
return false;
}
let ascii_chars = text.chars().filter(|c| c.is_ascii()).count() as f64;
let ascii_chars = text.chars().filter(char::is_ascii).count() as f64;
let ascii_ratio = ascii_chars / total_chars;
if ascii_ratio < FAST_PATH_MIN_ASCII_RATIO {
return false;
@@ -484,8 +483,7 @@ fn is_structural_line(line: &str) -> bool {
|| lowered
.chars()
.next()
.map(|c| c.is_ascii_digit())
.unwrap_or(false)
.is_some_and(|c| c.is_ascii_digit())
&& lowered.contains('.')
}
@@ -572,14 +570,13 @@ fn prepare_pdf_viewer(tab: &headless_chrome::Tab, page_number: u32) {
const toolbar = app.shadowRoot.querySelector('#toolbar');
if (toolbar) {{ toolbar.style.display = 'none'; }}
}}
const page = viewer.shadowRoot.querySelector('viewer-page:nth-of-type({page})');
const page = viewer.shadowRoot.querySelector('viewer-page:nth-of-type({page_number})');
if (page && page.scrollIntoView) {{
page.scrollIntoView({{ block: 'start', inline: 'center' }});
}}
const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page}"]');
const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page_number}"]');
return !!canvas;
}})()"#,
page = page_number
}})()"#
);
match tab.evaluate(&script, false) {
@@ -607,12 +604,11 @@ fn canvas_viewport_for_page(
if (!embed || !embed.shadowRoot) return null;
const viewer = embed.shadowRoot.querySelector('pdf-viewer');
if (!viewer || !viewer.shadowRoot) return null;
const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page}"]');
const canvas = viewer.shadowRoot.querySelector('canvas[aria-label="Page {page_number}"]');
if (!canvas) return null;
const rect = canvas.getBoundingClientRect();
return {{ x: rect.x, y: rect.y, width: rect.width, height: rect.height }};
}})()"#,
page = page_number
}})()"#
);
let result = tab
@@ -683,7 +679,7 @@ fn capture_full_page_png(tab: &headless_chrome::Tab) -> Result<Vec<u8>, AppError
})
}
fn is_suspicious_image(len: usize) -> bool {
const fn is_suspicious_image(len: usize) -> bool {
len < MIN_PAGE_IMAGE_BYTES
}
@@ -710,7 +706,7 @@ fn is_low_quality_response(content: &str) -> bool {
lowered.contains("unable to") || lowered.contains("cannot")
}
fn prompt_for_attempt(attempt: usize, base_prompt: &str) -> &str {
const fn prompt_for_attempt(attempt: usize, base_prompt: &str) -> &str {
if attempt == 0 {
base_prompt
} else {