mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-29 05:16:26 +02:00
chore: additional clippy fixes after rebasing
This commit is contained in:
@@ -108,7 +108,15 @@ struct IngestionStats {
|
||||
negative_ingested: usize,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(
|
||||
clippy::too_many_arguments,
|
||||
clippy::too_many_lines,
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_sign_loss,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::arithmetic_side_effects,
|
||||
clippy::indexing_slicing
|
||||
)]
|
||||
pub async fn ensure_corpus(
|
||||
dataset: &ConvertedDataset,
|
||||
slice: &ResolvedSlice<'_>,
|
||||
@@ -167,7 +175,7 @@ pub async fn ensure_corpus(
|
||||
.paragraphs
|
||||
.get(idx)
|
||||
.copied()
|
||||
.ok_or_else(|| anyhow!("slice missing paragraph index {}", idx))?;
|
||||
.ok_or_else(|| anyhow!("slice missing paragraph index {idx}"))?;
|
||||
plan.push(ParagraphPlan {
|
||||
slot: plan.len(),
|
||||
entry,
|
||||
@@ -236,8 +244,8 @@ pub async fn ensure_corpus(
|
||||
let new_shards = ingest_paragraph_batch(
|
||||
dataset,
|
||||
&ingest_requests,
|
||||
embedding.clone(),
|
||||
openai.clone(),
|
||||
Arc::clone(&embedding),
|
||||
Arc::clone(&openai),
|
||||
user_id,
|
||||
&ingestion_fingerprint,
|
||||
&embedding_backend_label,
|
||||
@@ -265,10 +273,10 @@ pub async fn ensure_corpus(
|
||||
.context("shard record missing after ingestion run")?;
|
||||
if cache.refresh_embeddings_only || shard_record.needs_reembed {
|
||||
// Embeddings are now generated by the pipeline using FastEmbed - no need to re-embed
|
||||
shard_record.shard.ingestion_fingerprint = ingestion_fingerprint.clone();
|
||||
shard_record.shard.ingestion_fingerprint.clone_from(&ingestion_fingerprint);
|
||||
shard_record.shard.ingested_at = Utc::now();
|
||||
shard_record.shard.embedding_backend = embedding_backend_label.clone();
|
||||
shard_record.shard.embedding_model = embedding_model_code.clone();
|
||||
shard_record.shard.embedding_backend.clone_from(&embedding_backend_label);
|
||||
shard_record.shard.embedding_model.clone_from(&embedding_model_code);
|
||||
shard_record.shard.embedding_dimension = embedding_dimension;
|
||||
shard_record.dirty = true;
|
||||
shard_record.needs_reembed = false;
|
||||
@@ -282,7 +290,7 @@ pub async fn ensure_corpus(
|
||||
|
||||
let mut corpus_paragraphs = Vec::with_capacity(plan.len());
|
||||
for record in &records {
|
||||
let shard = &record.as_ref().expect("record missing").shard;
|
||||
let shard = &record.as_ref().context("record missing")?.shard;
|
||||
corpus_paragraphs.push(shard.to_corpus_paragraph());
|
||||
}
|
||||
|
||||
@@ -438,11 +446,11 @@ async fn ingest_paragraph_batch(
|
||||
let pipeline_config = ingestion_config.clone();
|
||||
let pipeline = IngestionPipeline::new_with_config(
|
||||
db,
|
||||
openai.clone(),
|
||||
Arc::clone(&openai),
|
||||
app_config,
|
||||
None::<Arc<retrieval_pipeline::reranking::RerankerPool>>,
|
||||
storage,
|
||||
embedding.clone(),
|
||||
Arc::clone(&embedding),
|
||||
pipeline_config,
|
||||
)?;
|
||||
let pipeline = Arc::new(pipeline);
|
||||
@@ -458,11 +466,11 @@ async fn ingest_paragraph_batch(
|
||||
);
|
||||
let model_clone = embedding_model.clone();
|
||||
let backend_clone = embedding_backend.to_string();
|
||||
let pipeline_clone = pipeline.clone();
|
||||
let pipeline_clone = Arc::clone(&pipeline);
|
||||
let category_clone = category.clone();
|
||||
let tasks = batch.iter().cloned().map(move |request| {
|
||||
ingest_single_paragraph(
|
||||
pipeline_clone.clone(),
|
||||
Arc::clone(&pipeline_clone),
|
||||
request,
|
||||
category_clone.clone(),
|
||||
user_id,
|
||||
@@ -684,6 +692,7 @@ pub fn corpus_handle_from_manifest(manifest: CorpusManifest, base_dir: PathBuf)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::indexing_slicing)]
|
||||
fn compute_file_checksum(path: &Path) -> Result<String> {
|
||||
let mut file = fs::File::open(path)
|
||||
.with_context(|| format!("opening file {} for checksum", path.display()))?;
|
||||
@@ -736,7 +745,8 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn dummy_slice<'a>(dataset: &'a ConvertedDataset) -> ResolvedSlice<'a> {
|
||||
#[allow(clippy::too_many_lines, clippy::indexing_slicing)]
|
||||
fn dummy_slice(dataset: &ConvertedDataset) -> ResolvedSlice<'_> {
|
||||
let paragraph = &dataset.paragraphs[0];
|
||||
let question = ¶graph.questions[0];
|
||||
let manifest = SliceManifest {
|
||||
|
||||
Reference in New Issue
Block a user