chore: additional clippy fixes after rebasing

2026-06-29 05:16:26 +02:00 · 2026-05-27 07:37:18 +02:00
parent 293440b0ee
commit 414d2f5b34
39 changed files with 321 additions and 402 deletions
@@ -108,7 +108,15 @@ struct IngestionStats {
    negative_ingested: usize,
 }

-#[allow(clippy::too_many_arguments)]
+#[allow(
+    clippy::too_many_arguments,
+    clippy::too_many_lines,
+    clippy::cast_possible_truncation,
+    clippy::cast_sign_loss,
+    clippy::cast_precision_loss,
+    clippy::arithmetic_side_effects,
+    clippy::indexing_slicing
+)]
 pub async fn ensure_corpus(
    dataset: &ConvertedDataset,
    slice: &ResolvedSlice<'_>,
@@ -167,7 +175,7 @@ pub async fn ensure_corpus(
                .paragraphs
                .get(idx)
                .copied()
-                .ok_or_else(|| anyhow!("slice missing paragraph index {}", idx))?;
+                .ok_or_else(|| anyhow!("slice missing paragraph index {idx}"))?;
            plan.push(ParagraphPlan {
                slot: plan.len(),
                entry,
@@ -236,8 +244,8 @@ pub async fn ensure_corpus(
        let new_shards = ingest_paragraph_batch(
            dataset,
            &ingest_requests,
-            embedding.clone(),
-            openai.clone(),
+            Arc::clone(&embedding),
+            Arc::clone(&openai),
            user_id,
            &ingestion_fingerprint,
            &embedding_backend_label,
@@ -265,10 +273,10 @@ pub async fn ensure_corpus(
            .context("shard record missing after ingestion run")?;
        if cache.refresh_embeddings_only || shard_record.needs_reembed {
            // Embeddings are now generated by the pipeline using FastEmbed - no need to re-embed
-            shard_record.shard.ingestion_fingerprint = ingestion_fingerprint.clone();
+            shard_record.shard.ingestion_fingerprint.clone_from(&ingestion_fingerprint);
            shard_record.shard.ingested_at = Utc::now();
-            shard_record.shard.embedding_backend = embedding_backend_label.clone();
-            shard_record.shard.embedding_model = embedding_model_code.clone();
+            shard_record.shard.embedding_backend.clone_from(&embedding_backend_label);
+            shard_record.shard.embedding_model.clone_from(&embedding_model_code);
            shard_record.shard.embedding_dimension = embedding_dimension;
            shard_record.dirty = true;
            shard_record.needs_reembed = false;
@@ -282,7 +290,7 @@ pub async fn ensure_corpus(

    let mut corpus_paragraphs = Vec::with_capacity(plan.len());
    for record in &records {
-        let shard = &record.as_ref().expect("record missing").shard;
+        let shard = &record.as_ref().context("record missing")?.shard;
        corpus_paragraphs.push(shard.to_corpus_paragraph());
    }

@@ -438,11 +446,11 @@ async fn ingest_paragraph_batch(
    let pipeline_config = ingestion_config.clone();
    let pipeline = IngestionPipeline::new_with_config(
        db,
-        openai.clone(),
+        Arc::clone(&openai),
        app_config,
        None::<Arc<retrieval_pipeline::reranking::RerankerPool>>,
        storage,
-        embedding.clone(),
+        Arc::clone(&embedding),
        pipeline_config,
    )?;
    let pipeline = Arc::new(pipeline);
@@ -458,11 +466,11 @@ async fn ingest_paragraph_batch(
        );
        let model_clone = embedding_model.clone();
        let backend_clone = embedding_backend.to_string();
-        let pipeline_clone = pipeline.clone();
+        let pipeline_clone = Arc::clone(&pipeline);
        let category_clone = category.clone();
        let tasks = batch.iter().cloned().map(move |request| {
            ingest_single_paragraph(
-                pipeline_clone.clone(),
+                Arc::clone(&pipeline_clone),
                request,
                category_clone.clone(),
                user_id,
@@ -684,6 +692,7 @@ pub fn corpus_handle_from_manifest(manifest: CorpusManifest, base_dir: PathBuf)
    }
 }

+#[allow(clippy::indexing_slicing)]
 fn compute_file_checksum(path: &Path) -> Result<String> {
    let mut file = fs::File::open(path)
        .with_context(|| format!("opening file {} for checksum", path.display()))?;
@@ -736,7 +745,8 @@ mod tests {
        }
    }

-    fn dummy_slice<'a>(dataset: &'a ConvertedDataset) -> ResolvedSlice<'a> {
+    #[allow(clippy::too_many_lines, clippy::indexing_slicing)]
+    fn dummy_slice(dataset: &ConvertedDataset) -> ResolvedSlice<'_> {
        let paragraph = &dataset.paragraphs[0];
        let question = &paragraph.questions[0];
        let manifest = SliceManifest {