chore: additional clippy fixes after rebasing

This commit is contained in:
Per Stark
2026-05-27 07:37:18 +02:00
parent 293440b0ee
commit 414d2f5b34
39 changed files with 321 additions and 402 deletions
+20 -20
View File
@@ -4,6 +4,7 @@ use std::{
time::{Duration, Instant},
};
use anyhow::{anyhow, Result};
use async_openai::Client;
use common::{
storage::{
@@ -26,6 +27,7 @@ use crate::{
slice, snapshot,
};
#[allow(clippy::struct_excessive_bools)]
pub(super) struct EvaluationContext<'a> {
dataset: &'a ConvertedDataset,
config: &'a Config,
@@ -119,41 +121,39 @@ impl<'a> EvaluationContext<'a> {
self.config
}
pub fn slice(&self) -> &slice::ResolvedSlice<'a> {
self.slice.as_ref().expect("slice has not been prepared")
pub fn slice(&self) -> Result<&slice::ResolvedSlice<'a>> {
self.slice.as_ref().ok_or_else(|| anyhow!("slice has not been prepared"))
}
pub fn db(&self) -> &SurrealDbClient {
self.db.as_ref().expect("database connection missing")
pub fn db(&self) -> Result<&SurrealDbClient> {
self.db.as_ref().ok_or_else(|| anyhow!("database connection missing"))
}
pub fn descriptor(&self) -> &snapshot::Descriptor {
pub fn descriptor(&self) -> Result<&snapshot::Descriptor> {
self.descriptor
.as_ref()
.expect("snapshot descriptor unavailable")
.ok_or_else(|| anyhow!("snapshot descriptor unavailable"))
}
pub fn embedding_provider(&self) -> &EmbeddingProvider {
pub fn embedding_provider(&self) -> Result<&EmbeddingProvider> {
self.embedding_provider
.as_ref()
.expect("embedding provider not initialised")
.ok_or_else(|| anyhow!("embedding provider not initialised"))
}
pub fn openai_client(&self) -> Arc<Client<async_openai::config::OpenAIConfig>> {
self.openai_client
.as_ref()
.expect("openai client missing")
.clone()
pub fn openai_client(&self) -> Result<Arc<Client<async_openai::config::OpenAIConfig>>> {
Ok(Arc::clone(self.openai_client.as_ref().ok_or_else(|| anyhow!("openai client missing"))?))
}
pub fn corpus_handle(&self) -> &corpus::CorpusHandle {
self.corpus_handle.as_ref().expect("corpus handle missing")
pub fn corpus_handle(&self) -> Result<&corpus::CorpusHandle> {
self.corpus_handle.as_ref().ok_or_else(|| anyhow!("corpus handle missing"))
}
pub fn evaluation_user(&self) -> &User {
self.eval_user.as_ref().expect("evaluation user missing")
pub fn evaluation_user(&self) -> Result<&User> {
self.eval_user.as_ref().ok_or_else(|| anyhow!("evaluation user missing"))
}
#[allow(clippy::arithmetic_side_effects)]
pub fn record_stage_duration(&mut self, stage: EvalStage, duration: Duration) {
let elapsed = duration.as_millis();
match stage {
@@ -167,8 +167,8 @@ impl<'a> EvaluationContext<'a> {
}
}
pub fn into_summary(self) -> EvaluationSummary {
self.summary.expect("evaluation summary missing")
pub fn into_summary(self) -> Result<EvaluationSummary> {
self.summary.ok_or_else(|| anyhow!("evaluation summary missing"))
}
}
@@ -184,7 +184,7 @@ pub(super) enum EvalStage {
}
impl EvalStage {
pub fn label(&self) -> &'static str {
pub fn label(self) -> &'static str {
match self {
EvalStage::PrepareSlice => "prepare-slice",
EvalStage::PrepareDb => "prepare-db",
+1 -1
View File
@@ -23,5 +23,5 @@ pub async fn run_evaluation(
let machine = stages::summarize(machine, &mut ctx).await?;
let _ = stages::finalize(machine, &mut ctx).await?;
Ok(ctx.into_summary())
ctx.into_summary()
}
+1 -1
View File
@@ -55,5 +55,5 @@ pub(crate) async fn finalize(
machine
.finalize()
.map_err(|(_, guard)| map_guard_error("finalize", guard))
.map_err(|(_, guard)| map_guard_error("finalize", &guard))
}
+1 -1
View File
@@ -19,7 +19,7 @@ use state_machines::core::GuardError;
use super::state::EvaluationMachine;
fn map_guard_error(event: &str, guard: GuardError) -> anyhow::Error {
fn map_guard_error(event: &str, guard: &GuardError) -> anyhow::Error {
anyhow::anyhow!("invalid evaluation pipeline transition during {event}: {guard:?}")
}
@@ -11,6 +11,7 @@ use super::super::{
};
use super::{map_guard_error, StageResult};
#[allow(clippy::too_many_lines)]
pub(crate) async fn prepare_corpus(
machine: EvaluationMachine<(), DbReady>,
ctx: &mut EvaluationContext<'_>,
@@ -24,13 +25,13 @@ pub(crate) async fn prepare_corpus(
let config = ctx.config();
let cache_settings = corpus::CorpusCacheConfig::from(config);
let embedding_provider = ctx.embedding_provider().clone();
let openai_client = ctx.openai_client();
let slice = ctx.slice();
let embedding_provider = ctx.embedding_provider()?.clone();
let openai_client = ctx.openai_client()?;
let slice = ctx.slice()?;
let window = slice::select_window(slice, ctx.config().slice_offset, ctx.config().limit)
.context("selecting slice window for corpus preparation")?;
let descriptor = snapshot::Descriptor::new(config, slice, ctx.embedding_provider());
let descriptor = snapshot::Descriptor::new(config, slice, ctx.embedding_provider()?);
let ingestion_config = corpus::make_ingestion_config(config);
let expected_fingerprint = corpus::compute_ingestion_fingerprint(
ctx.dataset(),
@@ -47,7 +48,7 @@ pub(crate) async fn prepare_corpus(
if !config.reseed_slice {
let requested_cases = window.cases.len();
if can_reuse_namespace(
ctx.db(),
ctx.db()?,
&descriptor,
&ctx.namespace,
&ctx.database,
@@ -81,7 +82,7 @@ pub(crate) async fn prepare_corpus(
return machine
.prepare_corpus()
.map_err(|(_, guard)| map_guard_error("prepare_corpus", guard));
.map_err(|(_, guard)| map_guard_error("prepare_corpus", &guard));
}
info!(
cache = %base_dir.display(),
@@ -137,5 +138,5 @@ pub(crate) async fn prepare_corpus(
machine
.prepare_corpus()
.map_err(|(_, guard)| map_guard_error("prepare_corpus", guard))
.map_err(|(_, guard)| map_guard_error("prepare_corpus", &guard))
}
@@ -117,5 +117,5 @@ pub(crate) async fn prepare_db(
machine
.prepare_db()
.map_err(|(_, guard)| map_guard_error("prepare_db", guard))
.map_err(|(_, guard)| map_guard_error("prepare_db", &guard))
}
@@ -19,6 +19,7 @@ use super::super::{
};
use super::{map_guard_error, StageResult};
#[allow(clippy::too_many_lines)]
pub(crate) async fn prepare_namespace(
machine: EvaluationMachine<(), CorpusReady>,
ctx: &mut EvaluationContext<'_>,
@@ -39,9 +40,9 @@ pub(crate) async fn prepare_namespace(
.to_string();
let namespace = ctx.namespace.clone();
let database = ctx.database.clone();
let embedding_provider = ctx.embedding_provider().clone();
let embedding_provider = ctx.embedding_provider()?.clone();
let corpus_handle = ctx.corpus_handle();
let corpus_handle = ctx.corpus_handle()?;
let base_manifest = &corpus_handle.manifest;
let manifest_for_seed =
if ctx.window_offset == 0 && ctx.window_length >= base_manifest.questions.len() {
@@ -60,10 +61,10 @@ pub(crate) async fn prepare_namespace(
let mut namespace_reused = false;
if !config.reseed_slice {
namespace_reused = {
let slice = ctx.slice();
let slice = ctx.slice()?;
can_reuse_namespace(
ctx.db(),
ctx.descriptor(),
ctx.db()?,
ctx.descriptor()?,
&namespace,
&database,
dataset.metadata.id.as_str(),
@@ -78,19 +79,19 @@ pub(crate) async fn prepare_namespace(
let mut namespace_seed_ms = None;
if !namespace_reused {
ctx.must_reapply_settings = true;
if let Err(err) = reset_namespace(ctx.db(), &namespace, &database).await {
if let Err(err) = reset_namespace(ctx.db()?, &namespace, &database).await {
warn!(
error = %err,
namespace,
database = %database,
"Failed to reset namespace before reseeding; continuing with existing data"
);
} else if let Err(err) = ctx.db().apply_migrations().await {
} else if let Err(err) = ctx.db()?.apply_migrations().await {
warn!(error = %err, "Failed to reapply migrations after namespace reset");
}
{
let slice = ctx.slice();
let slice = ctx.slice()?;
info!(
slice = slice.manifest.slice_id.as_str(),
window_offset = ctx.window_offset,
@@ -113,10 +114,10 @@ pub(crate) async fn prepare_namespace(
"Seeding ingestion corpus into SurrealDB"
);
}
let indexes_disabled = remove_all_indexes(ctx.db()).await.is_ok();
let indexes_disabled = remove_all_indexes(ctx.db()?).await.is_ok();
let seed_start = Instant::now();
corpus::seed_manifest_into_db(ctx.db(), &manifest_for_seed)
corpus::seed_manifest_into_db(ctx.db()?, &manifest_for_seed)
.await
.context("seeding ingestion corpus from manifest")?;
namespace_seed_ms = Some(seed_start.elapsed().as_millis());
@@ -124,15 +125,15 @@ pub(crate) async fn prepare_namespace(
// Recreate indexes AFTER data is loaded (correct bulk loading pattern)
if indexes_disabled {
info!("Recreating indexes after seeding data");
recreate_indexes(ctx.db(), embedding_provider.dimension())
recreate_indexes(ctx.db()?, embedding_provider.dimension())
.await
.context("recreating indexes with correct dimension")?;
warm_hnsw_cache(ctx.db(), embedding_provider.dimension()).await?;
warm_hnsw_cache(ctx.db()?, embedding_provider.dimension()).await?;
}
{
let slice = ctx.slice();
let slice = ctx.slice()?;
record_namespace_state(
ctx.descriptor(),
ctx.descriptor()?,
dataset.metadata.id.as_str(),
slice.manifest.slice_id.as_str(),
expected_fingerprint.as_str(),
@@ -145,17 +146,17 @@ pub(crate) async fn prepare_namespace(
}
if ctx.must_reapply_settings {
let mut settings = SystemSettings::get_current(ctx.db())
let mut settings = SystemSettings::get_current(ctx.db()?)
.await
.context("reloading system settings after namespace reset")?;
settings =
enforce_system_settings(ctx.db(), settings, embedding_provider.dimension(), config)
enforce_system_settings(ctx.db()?, settings, embedding_provider.dimension(), config)
.await?;
ctx.settings = Some(settings);
ctx.must_reapply_settings = false;
}
let user = ensure_eval_user(ctx.db()).await?;
let user = ensure_eval_user(ctx.db()?).await?;
ctx.eval_user = Some(user);
let total_manifest_questions = manifest_for_seed.questions.len();
@@ -199,5 +200,5 @@ pub(crate) async fn prepare_namespace(
machine
.prepare_namespace()
.map_err(|(_, guard)| map_guard_error("prepare_namespace", guard))
.map_err(|(_, guard)| map_guard_error("prepare_namespace", &guard))
}
@@ -68,5 +68,5 @@ pub(crate) async fn prepare_slice(
machine
.prepare_slice()
.map_err(|(_, guard)| map_guard_error("prepare_slice", guard))
.map_err(|(_, guard)| map_guard_error("prepare_slice", &guard))
}
+17 -14
View File
@@ -1,6 +1,6 @@
use std::{collections::HashSet, sync::Arc, time::Instant};
use anyhow::Context;
use anyhow::{anyhow, Context};
use common::storage::types::StoredObject;
use futures::stream::{self, StreamExt};
use tracing::{debug, info};
@@ -21,6 +21,7 @@ use super::super::{
};
use super::{map_guard_error, StageResult};
#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects)]
pub(crate) async fn run_queries(
machine: EvaluationMachine<(), NamespaceReady>,
ctx: &mut EvaluationContext<'_>,
@@ -37,7 +38,7 @@ pub(crate) async fn run_queries(
let slice_settings = ctx
.slice_settings
.as_ref()
.expect("slice settings missing during query stage");
.ok_or_else(|| anyhow!("slice settings missing during query stage"))?;
let total_cases = ctx.cases.len();
let cases_iter = std::mem::take(&mut ctx.cases).into_iter().enumerate();
@@ -115,9 +116,9 @@ pub(crate) async fn run_queries(
chunk_rrf_fts_weight = active_tuning.chunk_rrf_fts_weight,
chunk_rrf_use_vector = active_tuning.flags.chunk_rrf_use_vector.as_bool(),
chunk_rrf_use_fts = active_tuning.flags.chunk_rrf_use_fts.as_bool(),
embedding_backend = ctx.embedding_provider().backend_label(),
embedding_backend = ctx.embedding_provider()?.backend_label(),
embedding_model = ctx
.embedding_provider()
.embedding_provider()?
.model_code()
.as_deref()
.unwrap_or("<default>"),
@@ -125,11 +126,11 @@ pub(crate) async fn run_queries(
);
let retrieval_config = Arc::new(retrieval_config);
ctx.rerank_pool = rerank_pool.clone();
ctx.retrieval_config = Some(retrieval_config.clone());
ctx.rerank_pool.clone_from(&rerank_pool);
ctx.retrieval_config = Some(Arc::clone(&retrieval_config));
ctx.evaluation_start = Some(Instant::now());
let user_id = ctx.evaluation_user().id.clone();
let user_id = ctx.evaluation_user()?.id.clone();
let concurrency = config.concurrency.max(1);
let diagnostics_enabled = ctx.diagnostics_enabled;
@@ -141,20 +142,20 @@ pub(crate) async fn run_queries(
"Starting evaluation with staged query execution"
);
let embedding_provider_for_queries = ctx.embedding_provider().clone();
let embedding_provider_for_queries = ctx.embedding_provider()?.clone();
let rerank_pool_for_queries = rerank_pool.clone();
let db = ctx.db().clone();
let openai_client = ctx.openai_client();
let db = ctx.db()?.clone();
let openai_client = ctx.openai_client()?;
let raw_results = stream::iter(cases_iter)
.map(move |(idx, case)| {
let db = db.clone();
let openai_client = openai_client.clone();
let openai_client = Arc::clone(&openai_client);
let user_id = user_id.clone();
let retrieval_config = retrieval_config.clone();
let retrieval_config = Arc::clone(&retrieval_config);
let embedding_provider = embedding_provider_for_queries.clone();
let rerank_pool = rerank_pool_for_queries.clone();
let semaphore = query_semaphore.clone();
let semaphore = Arc::clone(&query_semaphore);
let diagnostics_enabled = diagnostics_enabled;
async move {
@@ -374,9 +375,10 @@ pub(crate) async fn run_queries(
machine
.run_queries()
.map_err(|(_, guard)| map_guard_error("run_queries", guard))
.map_err(|(_, guard)| map_guard_error("run_queries", &guard))
}
#[allow(clippy::arithmetic_side_effects, clippy::cast_precision_loss)]
fn calculate_reciprocal_rank(rank: Option<usize>) -> f64 {
match rank {
Some(r) if r > 0 => 1.0 / (r as f64),
@@ -384,6 +386,7 @@ fn calculate_reciprocal_rank(rank: Option<usize>) -> f64 {
}
}
#[allow(clippy::arithmetic_side_effects, clippy::cast_precision_loss)]
fn calculate_ndcg(retrieved: &[RetrievedSummary], k: usize) -> f64 {
let mut dcg = 0.0;
let mut relevant_count = 0;
+8 -7
View File
@@ -13,6 +13,7 @@ use super::super::{
};
use super::{map_guard_error, StageResult};
#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects, clippy::cast_precision_loss)]
pub(crate) async fn summarize(
machine: EvaluationMachine<(), QueriesFinished>,
ctx: &mut EvaluationContext<'_>,
@@ -34,8 +35,8 @@ pub(crate) async fn summarize(
.unwrap_or_default();
let config = ctx.config();
let dataset = ctx.dataset();
let slice = ctx.slice();
let corpus_handle = ctx.corpus_handle();
let slice = ctx.slice()?;
let corpus_handle = ctx.corpus_handle()?;
let total_cases = summaries.len();
let mut correct = 0usize;
@@ -176,7 +177,7 @@ pub(crate) async fn summarize(
slice_total_paragraphs: slice.manifest.total_paragraphs,
slice_negative_multiplier: slice.manifest.negative_multiplier,
namespace_reused: ctx.namespace_reused,
corpus_paragraphs: ctx.corpus_handle().manifest.metadata.paragraph_count,
corpus_paragraphs: ctx.corpus_handle()?.manifest.metadata.paragraph_count,
ingestion_cache_path: corpus_handle.path.display().to_string(),
ingestion_reused: corpus_handle.reused_ingestion,
ingestion_embeddings_reused: corpus_handle.reused_embeddings,
@@ -189,9 +190,9 @@ pub(crate) async fn summarize(
negative_paragraphs_reused: corpus_handle.negative_reused,
latency_ms: latency_stats,
perf: perf_timings,
embedding_backend: ctx.embedding_provider().backend_label().to_string(),
embedding_model: ctx.embedding_provider().model_code(),
embedding_dimension: ctx.embedding_provider().dimension(),
embedding_backend: ctx.embedding_provider()?.backend_label().to_string(),
embedding_model: ctx.embedding_provider()?.model_code(),
embedding_dimension: ctx.embedding_provider()?.dimension(),
rerank_enabled: config.retrieval.rerank,
rerank_pool_size: ctx
.rerank_pool
@@ -228,5 +229,5 @@ pub(crate) async fn summarize(
machine
.summarize()
.map_err(|(_, guard)| map_guard_error("summarize", guard))
.map_err(|(_, guard)| map_guard_error("summarize", &guard))
}