chore: additional clippy fixes after rebasing

This commit is contained in:
Per Stark
2026-05-27 07:37:18 +02:00
parent 293440b0ee
commit 414d2f5b34
39 changed files with 321 additions and 402 deletions
+23 -13
View File
@@ -108,7 +108,15 @@ struct IngestionStats {
negative_ingested: usize,
}
#[allow(clippy::too_many_arguments)]
#[allow(
clippy::too_many_arguments,
clippy::too_many_lines,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::arithmetic_side_effects,
clippy::indexing_slicing
)]
pub async fn ensure_corpus(
dataset: &ConvertedDataset,
slice: &ResolvedSlice<'_>,
@@ -167,7 +175,7 @@ pub async fn ensure_corpus(
.paragraphs
.get(idx)
.copied()
.ok_or_else(|| anyhow!("slice missing paragraph index {}", idx))?;
.ok_or_else(|| anyhow!("slice missing paragraph index {idx}"))?;
plan.push(ParagraphPlan {
slot: plan.len(),
entry,
@@ -236,8 +244,8 @@ pub async fn ensure_corpus(
let new_shards = ingest_paragraph_batch(
dataset,
&ingest_requests,
embedding.clone(),
openai.clone(),
Arc::clone(&embedding),
Arc::clone(&openai),
user_id,
&ingestion_fingerprint,
&embedding_backend_label,
@@ -265,10 +273,10 @@ pub async fn ensure_corpus(
.context("shard record missing after ingestion run")?;
if cache.refresh_embeddings_only || shard_record.needs_reembed {
// Embeddings are now generated by the pipeline using FastEmbed - no need to re-embed
shard_record.shard.ingestion_fingerprint = ingestion_fingerprint.clone();
shard_record.shard.ingestion_fingerprint.clone_from(&ingestion_fingerprint);
shard_record.shard.ingested_at = Utc::now();
shard_record.shard.embedding_backend = embedding_backend_label.clone();
shard_record.shard.embedding_model = embedding_model_code.clone();
shard_record.shard.embedding_backend.clone_from(&embedding_backend_label);
shard_record.shard.embedding_model.clone_from(&embedding_model_code);
shard_record.shard.embedding_dimension = embedding_dimension;
shard_record.dirty = true;
shard_record.needs_reembed = false;
@@ -282,7 +290,7 @@ pub async fn ensure_corpus(
let mut corpus_paragraphs = Vec::with_capacity(plan.len());
for record in &records {
let shard = &record.as_ref().expect("record missing").shard;
let shard = &record.as_ref().context("record missing")?.shard;
corpus_paragraphs.push(shard.to_corpus_paragraph());
}
@@ -438,11 +446,11 @@ async fn ingest_paragraph_batch(
let pipeline_config = ingestion_config.clone();
let pipeline = IngestionPipeline::new_with_config(
db,
openai.clone(),
Arc::clone(&openai),
app_config,
None::<Arc<retrieval_pipeline::reranking::RerankerPool>>,
storage,
embedding.clone(),
Arc::clone(&embedding),
pipeline_config,
)?;
let pipeline = Arc::new(pipeline);
@@ -458,11 +466,11 @@ async fn ingest_paragraph_batch(
);
let model_clone = embedding_model.clone();
let backend_clone = embedding_backend.to_string();
let pipeline_clone = pipeline.clone();
let pipeline_clone = Arc::clone(&pipeline);
let category_clone = category.clone();
let tasks = batch.iter().cloned().map(move |request| {
ingest_single_paragraph(
pipeline_clone.clone(),
Arc::clone(&pipeline_clone),
request,
category_clone.clone(),
user_id,
@@ -684,6 +692,7 @@ pub fn corpus_handle_from_manifest(manifest: CorpusManifest, base_dir: PathBuf)
}
}
#[allow(clippy::indexing_slicing)]
fn compute_file_checksum(path: &Path) -> Result<String> {
let mut file = fs::File::open(path)
.with_context(|| format!("opening file {} for checksum", path.display()))?;
@@ -736,7 +745,8 @@ mod tests {
}
}
fn dummy_slice<'a>(dataset: &'a ConvertedDataset) -> ResolvedSlice<'a> {
#[allow(clippy::too_many_lines, clippy::indexing_slicing)]
fn dummy_slice(dataset: &ConvertedDataset) -> ResolvedSlice<'_> {
let paragraph = &dataset.paragraphs[0];
let question = &paragraph.questions[0];
let manifest = SliceManifest {