mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-28 02:19:34 +02:00
clippy: adhere to pedantic clippy, uniform test error handling
This commit is contained in:
@@ -14,7 +14,7 @@ use common::utils::config::get_config;
|
||||
use common::{
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
store::{DynStore, StorageManager},
|
||||
store::{DynStorage, StorageManager},
|
||||
types::{ingestion_payload::IngestionPayload, ingestion_task::IngestionTask, StoredObject},
|
||||
},
|
||||
utils::config::{AppConfig, StorageKind},
|
||||
@@ -432,7 +432,7 @@ async fn ingest_paragraph_batch(
|
||||
storage: StorageKind::Memory,
|
||||
..Default::default()
|
||||
};
|
||||
let backend: DynStore = Arc::new(InMemory::new());
|
||||
let backend: DynStorage = Arc::new(InMemory::new());
|
||||
let storage = StorageManager::with_backend(backend, StorageKind::Memory);
|
||||
|
||||
let pipeline_config = ingestion_config.clone();
|
||||
|
||||
@@ -861,7 +861,7 @@ mod tests {
|
||||
let question = CorpusQuestion {
|
||||
question_id: "q1".to_string(),
|
||||
paragraph_id: paragraph_one.paragraph_id.clone(),
|
||||
text_content_id: text_content_id,
|
||||
text_content_id,
|
||||
question_text: "What is this?".to_string(),
|
||||
answers: vec!["Hello".to_string()],
|
||||
is_impossible: false,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use anyhow::{Context, Result};
|
||||
use common::storage::{db::SurrealDbClient, indexes::ensure_runtime_indexes};
|
||||
use common::storage::{db::SurrealDbClient, indexes::ensure_runtime};
|
||||
use tracing::info;
|
||||
|
||||
// Helper functions for index management during namespace reseed
|
||||
@@ -11,7 +11,7 @@ pub async fn remove_all_indexes(db: &SurrealDbClient) -> Result<()> {
|
||||
|
||||
pub async fn recreate_indexes(db: &SurrealDbClient, dimension: usize) -> Result<()> {
|
||||
info!("Recreating ALL indexes after namespace reseed via shared runtime helper");
|
||||
ensure_runtime_indexes(db, dimension)
|
||||
ensure_runtime(db, dimension)
|
||||
.await
|
||||
.context("creating runtime indexes")
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ use common::{
|
||||
utils::embedding::EmbeddingProvider,
|
||||
};
|
||||
use retrieval_pipeline::{
|
||||
pipeline::{PipelineStageTimings, RetrievalConfig},
|
||||
pipeline::{StageTimings, RetrievalConfig},
|
||||
reranking::RerankerPool,
|
||||
};
|
||||
|
||||
@@ -56,7 +56,7 @@ pub(super) struct EvaluationContext<'a> {
|
||||
pub corpus_handle: Option<corpus::CorpusHandle>,
|
||||
pub cases: Vec<SeededCase>,
|
||||
pub filtered_questions: usize,
|
||||
pub stage_latency_samples: Vec<PipelineStageTimings>,
|
||||
pub stage_latency_samples: Vec<StageTimings>,
|
||||
pub latencies: Vec<u128>,
|
||||
pub diagnostics_output: Vec<CaseDiagnostics>,
|
||||
pub query_summaries: Vec<CaseSummary>,
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::eval::{
|
||||
CaseSummary, RetrievedSummary,
|
||||
};
|
||||
use retrieval_pipeline::{
|
||||
pipeline::{self, PipelineStageTimings, RetrievalConfig},
|
||||
pipeline::{self, StageTimings, RetrievalConfig},
|
||||
reranking::RerankerPool,
|
||||
};
|
||||
use tokio::sync::Semaphore;
|
||||
@@ -75,10 +75,10 @@ pub(crate) async fn run_queries(
|
||||
retrieval_config.tuning.chunk_rrf_fts_weight = value;
|
||||
}
|
||||
if let Some(value) = config.retrieval.chunk_rrf_use_vector {
|
||||
retrieval_config.tuning.chunk_rrf_use_vector = value;
|
||||
retrieval_config.tuning.flags.chunk_rrf_use_vector = value.into();
|
||||
}
|
||||
if let Some(value) = config.retrieval.chunk_rrf_use_fts {
|
||||
retrieval_config.tuning.chunk_rrf_use_fts = value;
|
||||
retrieval_config.tuning.flags.chunk_rrf_use_fts = value.into();
|
||||
}
|
||||
if let Some(value) = config.retrieval.chunk_avg_chars_per_token {
|
||||
retrieval_config.tuning.avg_chars_per_token = value;
|
||||
@@ -113,8 +113,8 @@ pub(crate) async fn run_queries(
|
||||
chunk_rrf_k = active_tuning.chunk_rrf_k,
|
||||
chunk_rrf_vector_weight = active_tuning.chunk_rrf_vector_weight,
|
||||
chunk_rrf_fts_weight = active_tuning.chunk_rrf_fts_weight,
|
||||
chunk_rrf_use_vector = active_tuning.chunk_rrf_use_vector,
|
||||
chunk_rrf_use_fts = active_tuning.chunk_rrf_use_fts,
|
||||
chunk_rrf_use_vector = active_tuning.flags.chunk_rrf_use_vector.as_bool(),
|
||||
chunk_rrf_use_fts = active_tuning.flags.chunk_rrf_use_fts.as_bool(),
|
||||
embedding_backend = ctx.embedding_provider().backend_label(),
|
||||
embedding_model = ctx
|
||||
.embedding_provider()
|
||||
@@ -181,35 +181,32 @@ pub(crate) async fn run_queries(
|
||||
embedding_provider.embed(&question).await.with_context(|| {
|
||||
format!("generating embedding for question {}", question_id)
|
||||
})?;
|
||||
let reranker = match &rerank_pool {
|
||||
Some(pool) => Some(pool.checkout().await),
|
||||
let reranker = match rerank_pool.as_ref() {
|
||||
Some(pool) => pool.checkout().await,
|
||||
None => None,
|
||||
};
|
||||
|
||||
let params = pipeline::StrategyParams {
|
||||
db_client: &db,
|
||||
openai_client: &openai_client,
|
||||
embedding_provider: Some(&embedding_provider),
|
||||
input_text: &question,
|
||||
user_id: &user_id,
|
||||
config: (*retrieval_config).clone(),
|
||||
reranker,
|
||||
};
|
||||
let (result_output, pipeline_diagnostics, stage_timings) = if diagnostics_enabled {
|
||||
let outcome = pipeline::run_pipeline_with_embedding_with_diagnostics(
|
||||
&db,
|
||||
&openai_client,
|
||||
Some(&embedding_provider),
|
||||
params,
|
||||
query_embedding,
|
||||
&question,
|
||||
&user_id,
|
||||
(*retrieval_config).clone(),
|
||||
reranker,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("running pipeline for question {}", question_id))?;
|
||||
(outcome.results, outcome.diagnostics, outcome.stage_timings)
|
||||
} else {
|
||||
let outcome = pipeline::run_pipeline_with_embedding_with_metrics(
|
||||
&db,
|
||||
&openai_client,
|
||||
Some(&embedding_provider),
|
||||
params,
|
||||
query_embedding,
|
||||
&question,
|
||||
&user_id,
|
||||
(*retrieval_config).clone(),
|
||||
reranker,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("running pipeline for question {}", question_id))?;
|
||||
@@ -327,7 +324,7 @@ pub(crate) async fn run_queries(
|
||||
usize,
|
||||
CaseSummary,
|
||||
Option<CaseDiagnostics>,
|
||||
PipelineStageTimings,
|
||||
StageTimings,
|
||||
),
|
||||
anyhow::Error,
|
||||
>((idx, summary, diagnostics, stage_timings))
|
||||
|
||||
@@ -205,8 +205,8 @@ pub(crate) async fn summarize(
|
||||
chunk_rrf_k: active_tuning.chunk_rrf_k,
|
||||
chunk_rrf_vector_weight: active_tuning.chunk_rrf_vector_weight,
|
||||
chunk_rrf_fts_weight: active_tuning.chunk_rrf_fts_weight,
|
||||
chunk_rrf_use_vector: active_tuning.chunk_rrf_use_vector,
|
||||
chunk_rrf_use_fts: active_tuning.chunk_rrf_use_fts,
|
||||
chunk_rrf_use_vector: active_tuning.flags.chunk_rrf_use_vector.as_bool(),
|
||||
chunk_rrf_use_fts: active_tuning.flags.chunk_rrf_use_fts.as_bool(),
|
||||
ingest_chunk_min_tokens: config.ingest.ingest_chunk_min_tokens,
|
||||
ingest_chunk_max_tokens: config.ingest.ingest_chunk_max_tokens,
|
||||
ingest_chunks_only: config.ingest.ingest_chunks_only,
|
||||
|
||||
+25
-27
@@ -1037,6 +1037,31 @@ fn write_manifest(path: &Path, manifest: &SliceManifest) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
use crate::args::Config;
|
||||
|
||||
impl<'a> From<&'a Config> for SliceConfig<'a> {
|
||||
fn from(config: &'a Config) -> Self {
|
||||
slice_config_with_limit(config, None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn slice_config_with_limit<'a>(
|
||||
config: &'a Config,
|
||||
limit_override: Option<usize>,
|
||||
) -> SliceConfig<'a> {
|
||||
SliceConfig {
|
||||
cache_dir: config.cache_dir.as_path(),
|
||||
force_convert: config.force_convert,
|
||||
explicit_slice: config.slice.as_deref(),
|
||||
limit: limit_override.or(config.limit),
|
||||
corpus_limit: config.corpus_limit,
|
||||
slice_seed: config.slice_seed,
|
||||
llm_mode: config.llm_mode,
|
||||
negative_multiplier: config.negative_multiplier,
|
||||
require_verified_chunks: config.retrieval.require_verified_chunks,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -1214,30 +1239,3 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Config integration (merged from slice.rs)
|
||||
|
||||
use crate::args::Config;
|
||||
|
||||
impl<'a> From<&'a Config> for SliceConfig<'a> {
|
||||
fn from(config: &'a Config) -> Self {
|
||||
slice_config_with_limit(config, None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn slice_config_with_limit<'a>(
|
||||
config: &'a Config,
|
||||
limit_override: Option<usize>,
|
||||
) -> SliceConfig<'a> {
|
||||
SliceConfig {
|
||||
cache_dir: config.cache_dir.as_path(),
|
||||
force_convert: config.force_convert,
|
||||
explicit_slice: config.slice.as_deref(),
|
||||
limit: limit_override.or(config.limit),
|
||||
corpus_limit: config.corpus_limit,
|
||||
slice_seed: config.slice_seed,
|
||||
llm_mode: config.llm_mode,
|
||||
negative_multiplier: config.negative_multiplier,
|
||||
require_verified_chunks: config.retrieval.require_verified_chunks,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user