From 414d2f5b345bb2af293aeda6a2fa0f4f904bf4ac Mon Sep 17 00:00:00 2001 From: Per Stark Date: Wed, 27 May 2026 07:37:18 +0200 Subject: [PATCH] chore: additional clippy fixes after rebasing --- api-router/src/error.rs | 2 +- api-router/src/lib.rs | 4 +- api-router/src/routes/ingest.rs | 6 +- common/src/storage/store.rs | 2 +- common/src/storage/types/file_info.rs | 7 +- common/src/storage/types/knowledge_entity.rs | 10 +- common/src/storage/types/user.rs | 2 +- evaluations/src/args.rs | 3 + evaluations/src/cache.rs | 6 +- evaluations/src/cases.rs | 3 +- evaluations/src/corpus/orchestrator.rs | 36 +-- evaluations/src/corpus/store.rs | 25 +- evaluations/src/datasets/beir.rs | 18 +- evaluations/src/datasets/mod.rs | 18 +- evaluations/src/datasets/nq.rs | 1 + evaluations/src/db_helpers.rs | 4 +- evaluations/src/eval.rs | 4 +- evaluations/src/main.rs | 3 +- evaluations/src/namespace.rs | 2 +- evaluations/src/perf.rs | 1 + evaluations/src/pipeline/context.rs | 40 ++-- evaluations/src/pipeline/mod.rs | 2 +- evaluations/src/pipeline/stages/finalize.rs | 2 +- evaluations/src/pipeline/stages/mod.rs | 2 +- .../src/pipeline/stages/prepare_corpus.rs | 15 +- evaluations/src/pipeline/stages/prepare_db.rs | 2 +- .../src/pipeline/stages/prepare_namespace.rs | 37 +-- .../src/pipeline/stages/prepare_slice.rs | 2 +- .../src/pipeline/stages/run_queries.rs | 31 +-- evaluations/src/pipeline/stages/summarize.rs | 15 +- evaluations/src/report.rs | 217 ++++++------------ evaluations/src/settings.rs | 1 + evaluations/src/slice.rs | 53 +++-- evaluations/src/snapshot.rs | 2 + evaluations/src/types.rs | 12 + html-router/assets/style.css | 91 -------- .../src/middlewares/response_middleware.rs | 2 +- .../routes/chat/message_response_stream.rs | 12 +- main/src/main.rs | 28 ++- 39 files changed, 321 insertions(+), 402 deletions(-) diff --git a/api-router/src/error.rs b/api-router/src/error.rs index b7ca986..225ae5e 100644 --- a/api-router/src/error.rs +++ b/api-router/src/error.rs @@ -145,7 +145,7 @@ mod tests { assert_status_code(error, StatusCode::UNAUTHORIZED); // Test payload too large status - let error = ApiError::PayloadTooLarge("too big".to_string()); + let error = ApiErr::PayloadTooLarge("too big".to_string()); assert_status_code(error, StatusCode::PAYLOAD_TOO_LARGE); } diff --git a/api-router/src/lib.rs b/api-router/src/lib.rs index b133814..df24676 100644 --- a/api-router/src/lib.rs +++ b/api-router/src/lib.rs @@ -6,7 +6,7 @@ use axum::{ Router, }; use middleware_api_auth::api_auth; -use routes::{categories::get_categories, ingest::ingest_data, liveness::live, readiness::ready}; +use routes::{categories::list, ingest::ingest_data, liveness::live, readiness::ready}; pub mod api_state; pub mod error; @@ -32,7 +32,7 @@ where app_state.config.ingest_max_body_bytes, )), ) - .route("/categories", get(get_categories)) + .route("/categories", get(list)) .route_layer(from_fn_with_state(app_state.clone(), api_auth)); public.merge(protected) diff --git a/api-router/src/routes/ingest.rs b/api-router/src/routes/ingest.rs index a68f9ca..3938952 100644 --- a/api-router/src/routes/ingest.rs +++ b/api-router/src/routes/ingest.rs @@ -29,7 +29,7 @@ pub async fn ingest_data( State(state): State, Extension(user): Extension, TypedMultipart(input): TypedMultipart, -) -> Result { +) -> Result { let user_id = user.id; let content_bytes = input.content.as_ref().map_or(0, |c| c.len()); let has_content = input.content.as_ref().is_some_and(|c| !c.trim().is_empty()); @@ -46,10 +46,10 @@ pub async fn ingest_data( ) { Ok(()) => {} Err(IngestValidationError::PayloadTooLarge(message)) => { - return Err(ApiError::PayloadTooLarge(message)); + return Err(ApiErr::PayloadTooLarge(message)); } Err(IngestValidationError::BadRequest(message)) => { - return Err(ApiError::ValidationError(message)); + return Err(ApiErr::ValidationError(message)); } } diff --git a/common/src/storage/store.rs b/common/src/storage/store.rs index 154b543..02b463d 100644 --- a/common/src/storage/store.rs +++ b/common/src/storage/store.rs @@ -367,7 +367,7 @@ pub mod testing { storage: StorageKind::S3, s3_bucket: Some(configured_test_s3_bucket()), s3_endpoint: Some(configured_test_s3_endpoint()), - s3_region: Some("us-east-1".into()), + s3_region: "us-east-1".into(), pdf_ingest_mode: PdfIngestMode::LlmFirst, ..Default::default() } diff --git a/common/src/storage/types/file_info.rs b/common/src/storage/types/file_info.rs index 5a7fab5..7765471 100644 --- a/common/src/storage/types/file_info.rs +++ b/common/src/storage/types/file_info.rs @@ -705,7 +705,7 @@ mod tests { let database = &Uuid::new_v4().to_string(); let db = SurrealDbClient::memory(namespace, database) .await - .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + .expect("Failed to start in-memory surrealdb"); // Create a FileInfo instance directly let now = Utc::now(); @@ -728,14 +728,13 @@ mod tests { let retrieved = db .get_item::(&file_info.id) .await - .with_context(|| "Failed to retrieve file info".to_string())? - .with_context(|| "expected file".to_string())?; + .expect("Failed to retrieve file info") + .expect("expected file"); assert_eq!(retrieved.id, file_info.id); assert_eq!(retrieved.sha256, file_info.sha256); assert_eq!(retrieved.file_name, file_info.file_name); assert_eq!(retrieved.path, file_info.path); assert_eq!(retrieved.mime_type, file_info.mime_type); - Ok(()) } #[tokio::test] diff --git a/common/src/storage/types/knowledge_entity.rs b/common/src/storage/types/knowledge_entity.rs index 5c1c217..a1307b4 100644 --- a/common/src/storage/types/knowledge_entity.rs +++ b/common/src/storage/types/knowledge_entity.rs @@ -829,21 +829,19 @@ mod tests { let database = &Uuid::new_v4().to_string(); let db = SurrealDbClient::memory(namespace, database) .await - .with_context(|| "Failed to start in-memory surrealdb".to_string())?; + .expect("Failed to start in-memory surrealdb"); db.apply_migrations() .await - .with_context(|| "Failed to apply migrations".to_string())?; + .expect("Failed to apply migrations"); KnowledgeEntityEmbedding::redefine_hnsw_index(&db, 3) .await - .with_context(|| "Failed to redefine index length".to_string())?; + .expect("Failed to redefine index length"); let results = KnowledgeEntity::vector_search(5, vec![0.1, 0.2, 0.3], &db, "user") .await - .with_context(|| "vector search".to_string())?; + .expect("vector search"); assert!(results.is_empty()); - - Ok(()) } #[tokio::test] diff --git a/common/src/storage/types/user.rs b/common/src/storage/types/user.rs index 769d462..b5fb0c0 100644 --- a/common/src/storage/types/user.rs +++ b/common/src/storage/types/user.rs @@ -1036,7 +1036,7 @@ mod tests { #[tokio::test] async fn test_set_api_key_with_none_theme() { - let db = setup_test_db().await; + let db = setup_test_db().await.expect("Failed to setup test db"); let user = User::create_new( "legacy_theme@example.com".to_string(), diff --git a/evaluations/src/args.rs b/evaluations/src/args.rs index 2e385ff..ead1cfa 100644 --- a/evaluations/src/args.rs +++ b/evaluations/src/args.rs @@ -198,6 +198,7 @@ pub struct DatabaseArgs { #[derive(Parser, Debug, Clone)] #[command(author, version, about, long_about = None)] +#[allow(clippy::struct_excessive_bools)] pub struct Config { /// Convert the selected dataset and exit #[arg(long)] @@ -344,10 +345,12 @@ pub struct Config { } impl Config { + #[allow(clippy::unused_self)] pub fn context_token_limit(&self) -> Option { None } + #[allow(clippy::too_many_lines)] pub fn finalize(&mut self) -> Result<()> { // Handle dataset paths if let Some(raw) = &self.raw { diff --git a/evaluations/src/cache.rs b/evaluations/src/cache.rs index db31905..31a8594 100644 --- a/evaluations/src/cache.rs +++ b/evaluations/src/cache.rs @@ -1,6 +1,6 @@ use std::{ collections::HashMap, - path::{Path, PathBuf}, + path::Path, sync::{ atomic::{AtomicBool, Ordering}, Arc, @@ -19,7 +19,7 @@ struct EmbeddingCacheData { #[derive(Clone)] pub struct EmbeddingCache { - path: Arc, + path: Arc, data: Arc>, dirty: Arc, } @@ -39,7 +39,7 @@ impl EmbeddingCache { }; Ok(Self { - path: Arc::new(path), + path: Arc::from(path.as_path()), data: Arc::new(Mutex::new(data)), dirty: Arc::new(AtomicBool::new(false)), }) diff --git a/evaluations/src/cases.rs b/evaluations/src/cases.rs index e7c734f..1b20a39 100644 --- a/evaluations/src/cases.rs +++ b/evaluations/src/cases.rs @@ -163,6 +163,7 @@ mod tests { } #[test] + #[allow(clippy::indexing_slicing)] fn cases_respect_mode_filters() { let mut manifest = sample_manifest(); manifest.metadata.include_unanswerable = false; @@ -173,7 +174,7 @@ mod tests { assert_eq!(strict_cases[0].question_id, "q1"); assert_eq!(strict_cases[0].paragraph_title, "Alpha"); - let mut llm_manifest = manifest.clone(); + let mut llm_manifest = manifest; llm_manifest.metadata.include_unanswerable = true; llm_manifest.metadata.require_verified_chunks = false; diff --git a/evaluations/src/corpus/orchestrator.rs b/evaluations/src/corpus/orchestrator.rs index 9a56b19..29a149f 100644 --- a/evaluations/src/corpus/orchestrator.rs +++ b/evaluations/src/corpus/orchestrator.rs @@ -108,7 +108,15 @@ struct IngestionStats { negative_ingested: usize, } -#[allow(clippy::too_many_arguments)] +#[allow( + clippy::too_many_arguments, + clippy::too_many_lines, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::cast_precision_loss, + clippy::arithmetic_side_effects, + clippy::indexing_slicing +)] pub async fn ensure_corpus( dataset: &ConvertedDataset, slice: &ResolvedSlice<'_>, @@ -167,7 +175,7 @@ pub async fn ensure_corpus( .paragraphs .get(idx) .copied() - .ok_or_else(|| anyhow!("slice missing paragraph index {}", idx))?; + .ok_or_else(|| anyhow!("slice missing paragraph index {idx}"))?; plan.push(ParagraphPlan { slot: plan.len(), entry, @@ -236,8 +244,8 @@ pub async fn ensure_corpus( let new_shards = ingest_paragraph_batch( dataset, &ingest_requests, - embedding.clone(), - openai.clone(), + Arc::clone(&embedding), + Arc::clone(&openai), user_id, &ingestion_fingerprint, &embedding_backend_label, @@ -265,10 +273,10 @@ pub async fn ensure_corpus( .context("shard record missing after ingestion run")?; if cache.refresh_embeddings_only || shard_record.needs_reembed { // Embeddings are now generated by the pipeline using FastEmbed - no need to re-embed - shard_record.shard.ingestion_fingerprint = ingestion_fingerprint.clone(); + shard_record.shard.ingestion_fingerprint.clone_from(&ingestion_fingerprint); shard_record.shard.ingested_at = Utc::now(); - shard_record.shard.embedding_backend = embedding_backend_label.clone(); - shard_record.shard.embedding_model = embedding_model_code.clone(); + shard_record.shard.embedding_backend.clone_from(&embedding_backend_label); + shard_record.shard.embedding_model.clone_from(&embedding_model_code); shard_record.shard.embedding_dimension = embedding_dimension; shard_record.dirty = true; shard_record.needs_reembed = false; @@ -282,7 +290,7 @@ pub async fn ensure_corpus( let mut corpus_paragraphs = Vec::with_capacity(plan.len()); for record in &records { - let shard = &record.as_ref().expect("record missing").shard; + let shard = &record.as_ref().context("record missing")?.shard; corpus_paragraphs.push(shard.to_corpus_paragraph()); } @@ -438,11 +446,11 @@ async fn ingest_paragraph_batch( let pipeline_config = ingestion_config.clone(); let pipeline = IngestionPipeline::new_with_config( db, - openai.clone(), + Arc::clone(&openai), app_config, None::>, storage, - embedding.clone(), + Arc::clone(&embedding), pipeline_config, )?; let pipeline = Arc::new(pipeline); @@ -458,11 +466,11 @@ async fn ingest_paragraph_batch( ); let model_clone = embedding_model.clone(); let backend_clone = embedding_backend.to_string(); - let pipeline_clone = pipeline.clone(); + let pipeline_clone = Arc::clone(&pipeline); let category_clone = category.clone(); let tasks = batch.iter().cloned().map(move |request| { ingest_single_paragraph( - pipeline_clone.clone(), + Arc::clone(&pipeline_clone), request, category_clone.clone(), user_id, @@ -684,6 +692,7 @@ pub fn corpus_handle_from_manifest(manifest: CorpusManifest, base_dir: PathBuf) } } +#[allow(clippy::indexing_slicing)] fn compute_file_checksum(path: &Path) -> Result { let mut file = fs::File::open(path) .with_context(|| format!("opening file {} for checksum", path.display()))?; @@ -736,7 +745,8 @@ mod tests { } } - fn dummy_slice<'a>(dataset: &'a ConvertedDataset) -> ResolvedSlice<'a> { + #[allow(clippy::too_many_lines, clippy::indexing_slicing)] + fn dummy_slice(dataset: &ConvertedDataset) -> ResolvedSlice<'_> { let paragraph = &dataset.paragraphs[0]; let question = ¶graph.questions[0]; let manifest = SliceManifest { diff --git a/evaluations/src/corpus/store.rs b/evaluations/src/corpus/store.rs index c415472..0c46c98 100644 --- a/evaluations/src/corpus/store.rs +++ b/evaluations/src/corpus/store.rs @@ -197,6 +197,13 @@ pub struct CorpusHandle { pub negative_ingested: usize, } +#[allow( + clippy::arithmetic_side_effects, + clippy::cast_possible_truncation, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::indexing_slicing +)] pub fn window_manifest( manifest: &CorpusManifest, offset: usize, @@ -211,9 +218,7 @@ pub fn window_manifest( } if offset >= total { return Err(anyhow!( - "window offset {} exceeds manifest questions ({})", - offset, - total + "window offset {offset} exceeds manifest questions ({total})" )); } let end = (offset + length).min(total); @@ -601,6 +606,7 @@ fn normalize_answer_text(text: &str) -> String { .join(" ") } +#[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] fn chunk_items( items: &[T], max_items: usize, @@ -644,6 +650,7 @@ fn chunk_items( Ok(batches) } +#[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] async fn execute_batched_inserts( db: &SurrealDbClient, statement: impl AsRef, @@ -781,6 +788,7 @@ mod tests { use common::storage::types::knowledge_entity::KnowledgeEntityType; use uuid::Uuid; + #[allow(clippy::too_many_lines)] fn build_manifest() -> CorpusManifest { let user_id = "user-1".to_string(); let source_id = "source-1".to_string(); @@ -822,9 +830,9 @@ mod tests { id: Uuid::new_v4().to_string(), created_at: now, updated_at: now, - source_id: source_id.clone(), + source_id, chunk: "chunk text".to_string(), - user_id: user_id.clone(), + user_id, }; let paragraph_one = CorpusParagraph { @@ -846,9 +854,9 @@ mod tests { let paragraph_two = CorpusParagraph { paragraph_id: "p2".to_string(), title: "Paragraph 2".to_string(), - text_content: text_content.clone(), + text_content, entities: vec![EmbeddedKnowledgeEntity { - entity: entity.clone(), + entity, embedding: vec![0.1, 0.2, 0.3], }], relationships: Vec::new(), @@ -865,7 +873,7 @@ mod tests { question_text: "What is this?".to_string(), answers: vec!["Hello".to_string()], is_impossible: false, - matching_chunk_ids: vec![chunk.id.clone()], + matching_chunk_ids: vec![chunk.id], }; CorpusManifest { @@ -893,6 +901,7 @@ mod tests { } } + #[allow(clippy::indexing_slicing, clippy::expect_used)] #[test] fn window_manifest_trims_questions_and_negatives() { let manifest = build_manifest(); diff --git a/evaluations/src/datasets/beir.rs b/evaluations/src/datasets/beir.rs index 49c1714..13a5970 100644 --- a/evaluations/src/datasets/beir.rs +++ b/evaluations/src/datasets/beir.rs @@ -47,6 +47,7 @@ struct QrelEntry { score: i32, } +#[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] pub fn convert_beir(raw_dir: &Path, dataset: DatasetKind) -> Result> { let corpus_path = raw_dir.join("corpus.jsonl"); let queries_path = raw_dir.join("queries.jsonl"); @@ -76,18 +77,17 @@ pub fn convert_beir(raw_dir: &Path, dataset: DatasetKind) -> Result entry, - None => continue, + let Some(best) = select_best_doc(&entries) else { + continue; }; - let paragraph_slot = if let Some(slot) = paragraph_index.get(&best.doc_id) { *slot } else { + let Some(¶graph_slot) = paragraph_index.get(&best.doc_id) else { missing_docs += 1; warn!( query_id = %query_id, @@ -97,8 +97,7 @@ pub fn convert_beir(raw_dir: &Path, dataset: DatasetKind) -> Result Result Result { )) } +#[allow(clippy::arithmetic_side_effects)] fn load_corpus(path: &Path) -> Result> { let file = File::open(path).with_context(|| format!("opening BEIR corpus at {}", path.display()))?; @@ -181,6 +182,7 @@ fn load_corpus(path: &Path) -> Result> { Ok(corpus) } +#[allow(clippy::arithmetic_side_effects)] fn load_queries(path: &Path) -> Result> { let file = File::open(path) .with_context(|| format!("opening BEIR queries file at {}", path.display()))?; @@ -211,6 +213,7 @@ fn load_queries(path: &Path) -> Result> { Ok(queries) } +#[allow(clippy::arithmetic_side_effects)] fn load_qrels(path: &Path) -> Result>> { let file = File::open(path).with_context(|| format!("opening BEIR qrels at {}", path.display()))?; @@ -294,6 +297,7 @@ mod tests { use tempfile::tempdir; #[test] + #[allow(clippy::unwrap_used, clippy::expect_used, clippy::indexing_slicing)] fn converts_basic_beir_layout() { let dir = tempdir().unwrap(); let corpus = r#" diff --git a/evaluations/src/datasets/mod.rs b/evaluations/src/datasets/mod.rs index 62f8b72..1274c5e 100644 --- a/evaluations/src/datasets/mod.rs +++ b/evaluations/src/datasets/mod.rs @@ -305,7 +305,7 @@ impl DatasetKind { pub fn category(self) -> &'static str { match self { Self::SquadV2 => "SQuAD v2.0", - Self::NaturalQuestions => "Natural Questions", + Self::NaturalQuestions | Self::NqBeir => "Natural Questions", Self::Beir => "BEIR", Self::Fever => "FEVER", Self::Fiqa => "FiQA-2018", @@ -314,14 +314,13 @@ impl DatasetKind { Self::Quora => "Quora", Self::TrecCovid => "TREC-COVID", Self::Scifact => "SciFact", - Self::NqBeir => "Natural Questions", } } pub fn entity_suffix(self) -> &'static str { match self { Self::SquadV2 => "SQuAD", - Self::NaturalQuestions => "Natural Questions", + Self::NaturalQuestions | Self::NqBeir => "Natural Questions", Self::Beir => "BEIR", Self::Fever => "FEVER", Self::Fiqa => "FiQA", @@ -330,7 +329,6 @@ impl DatasetKind { Self::Quora => "Quora", Self::TrecCovid => "TREC-COVID", Self::Scifact => "SciFact", - Self::NqBeir => "Natural Questions", } } @@ -351,11 +349,19 @@ impl DatasetKind { } pub fn default_raw_path(self) -> PathBuf { - dataset_entry_for_kind(self).map_or_else(|err| panic!("dataset manifest missing entry for {self:?}: {err}"), |entry| entry.raw_path.clone()) + #[allow(clippy::panic)] + match dataset_entry_for_kind(self) { + Ok(entry) => entry.raw_path.clone(), + Err(err) => panic!("dataset manifest missing entry for {self:?}: {err}"), + } } pub fn default_converted_path(self) -> PathBuf { - dataset_entry_for_kind(self).map_or_else(|err| panic!("dataset manifest missing entry for {self:?}: {err}"), |entry| entry.converted_path.clone()) + #[allow(clippy::panic)] + match dataset_entry_for_kind(self) { + Ok(entry) => entry.converted_path.clone(), + Err(err) => panic!("dataset manifest missing entry for {self:?}: {err}"), + } } } diff --git a/evaluations/src/datasets/nq.rs b/evaluations/src/datasets/nq.rs index dde9f2b..3bb6444 100644 --- a/evaluations/src/datasets/nq.rs +++ b/evaluations/src/datasets/nq.rs @@ -11,6 +11,7 @@ use tracing::warn; use super::{ConvertedParagraph, ConvertedQuestion}; +#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects, clippy::cast_sign_loss)] pub fn convert_nq( raw_path: &Path, include_unanswerable: bool, diff --git a/evaluations/src/db_helpers.rs b/evaluations/src/db_helpers.rs index 02e7bc7..e154b73 100644 --- a/evaluations/src/db_helpers.rs +++ b/evaluations/src/db_helpers.rs @@ -55,6 +55,7 @@ mod tests { } #[tokio::test] + #[allow(clippy::expect_used, clippy::unwrap_used, clippy::indexing_slicing)] async fn reset_namespace_drops_existing_rows() { let namespace = format!("reset_ns_{}", Uuid::new_v4().simple()); let database = format!("reset_db_{}", Uuid::new_v4().simple()); @@ -90,8 +91,7 @@ mod tests { let rows: Vec = response.take(0).unwrap_or_default(); assert!( rows.is_empty(), - "reset namespace should drop rows, found {:?}", - rows + "reset namespace should drop rows, found {rows:?}", ); } Err(error) => { diff --git a/evaluations/src/eval.rs b/evaluations/src/eval.rs index e16fdbd..e0abf4b 100644 --- a/evaluations/src/eval.rs +++ b/evaluations/src/eval.rs @@ -25,7 +25,7 @@ use crate::{ }; /// Grow the slice ledger to contain the target number of cases. -pub async fn grow_slice(dataset: &ConvertedDataset, config: &Config) -> Result<()> { +pub fn grow_slice(dataset: &ConvertedDataset, config: &Config) -> Result<()> { let ledger_limit = ledger_target(config); let slice_settings = slice::slice_config_with_limit(config, ledger_limit); let slice = @@ -70,8 +70,8 @@ pub(crate) async fn write_chunk_diagnostics(path: &Path, cases: &[CaseDiagnostic Ok(()) } +#[allow(clippy::cast_precision_loss)] pub(crate) async fn warm_hnsw_cache(db: &SurrealDbClient, dimension: usize) -> Result<()> { - // Create a dummy embedding for cache warming let dummy_embedding: Vec = (0..dimension).map(|i| (i as f32).sin()).collect(); info!("Warming HNSW caches with sample queries"); diff --git a/evaluations/src/main.rs b/evaluations/src/main.rs index eb6a1d8..81c8b94 100644 --- a/evaluations/src/main.rs +++ b/evaluations/src/main.rs @@ -22,6 +22,7 @@ use tracing::info; use tracing_subscriber::{fmt, EnvFilter}; /// Configure `SurrealDB` environment variables for optimal performance +#[allow(clippy::arithmetic_side_effects, clippy::unwrap_used)] fn configure_surrealdb_performance(cpu_count: usize) { // Set environment variables only if they're not already set let indexing_batch_size = std::env::var("SURREAL_INDEXING_BATCH_SIZE") @@ -74,6 +75,7 @@ fn main() -> anyhow::Result<()> { runtime.block_on(async_main()) } +#[allow(clippy::too_many_lines)] async fn async_main() -> anyhow::Result<()> { // Log runtime configuration let cpu_count = std::thread::available_parallelism()?.get(); @@ -165,7 +167,6 @@ async fn async_main() -> anyhow::Result<()> { if parsed.config.slice_grow.is_some() { eval::grow_slice(&dataset, &parsed.config) - .await .context("growing slice ledger")?; return Ok(()); } diff --git a/evaluations/src/namespace.rs b/evaluations/src/namespace.rs index a696a24..aa673e6 100644 --- a/evaluations/src/namespace.rs +++ b/evaluations/src/namespace.rs @@ -101,7 +101,7 @@ pub(crate) async fn can_reuse_namespace( ingestion_fingerprint: &str, slice_case_count: usize, ) -> Result { - let state = if let Some(state) = descriptor.load_db_state().await? { state } else { + let Some(state) = descriptor.load_db_state().await? else { info!("No namespace state recorded; reseeding corpus from cached shards"); return Ok(false); }; diff --git a/evaluations/src/perf.rs b/evaluations/src/perf.rs index 21bd66c..cc38935 100644 --- a/evaluations/src/perf.rs +++ b/evaluations/src/perf.rs @@ -213,6 +213,7 @@ mod tests { } #[test] + #[allow(clippy::unwrap_used, clippy::expect_used)] fn writes_perf_mirrors_from_record() { let tmp = tempdir().unwrap(); let report_root = tmp.path().join("reports"); diff --git a/evaluations/src/pipeline/context.rs b/evaluations/src/pipeline/context.rs index 0e12c45..684f573 100644 --- a/evaluations/src/pipeline/context.rs +++ b/evaluations/src/pipeline/context.rs @@ -4,6 +4,7 @@ use std::{ time::{Duration, Instant}, }; +use anyhow::{anyhow, Result}; use async_openai::Client; use common::{ storage::{ @@ -26,6 +27,7 @@ use crate::{ slice, snapshot, }; +#[allow(clippy::struct_excessive_bools)] pub(super) struct EvaluationContext<'a> { dataset: &'a ConvertedDataset, config: &'a Config, @@ -119,41 +121,39 @@ impl<'a> EvaluationContext<'a> { self.config } - pub fn slice(&self) -> &slice::ResolvedSlice<'a> { - self.slice.as_ref().expect("slice has not been prepared") + pub fn slice(&self) -> Result<&slice::ResolvedSlice<'a>> { + self.slice.as_ref().ok_or_else(|| anyhow!("slice has not been prepared")) } - pub fn db(&self) -> &SurrealDbClient { - self.db.as_ref().expect("database connection missing") + pub fn db(&self) -> Result<&SurrealDbClient> { + self.db.as_ref().ok_or_else(|| anyhow!("database connection missing")) } - pub fn descriptor(&self) -> &snapshot::Descriptor { + pub fn descriptor(&self) -> Result<&snapshot::Descriptor> { self.descriptor .as_ref() - .expect("snapshot descriptor unavailable") + .ok_or_else(|| anyhow!("snapshot descriptor unavailable")) } - pub fn embedding_provider(&self) -> &EmbeddingProvider { + pub fn embedding_provider(&self) -> Result<&EmbeddingProvider> { self.embedding_provider .as_ref() - .expect("embedding provider not initialised") + .ok_or_else(|| anyhow!("embedding provider not initialised")) } - pub fn openai_client(&self) -> Arc> { - self.openai_client - .as_ref() - .expect("openai client missing") - .clone() + pub fn openai_client(&self) -> Result>> { + Ok(Arc::clone(self.openai_client.as_ref().ok_or_else(|| anyhow!("openai client missing"))?)) } - pub fn corpus_handle(&self) -> &corpus::CorpusHandle { - self.corpus_handle.as_ref().expect("corpus handle missing") + pub fn corpus_handle(&self) -> Result<&corpus::CorpusHandle> { + self.corpus_handle.as_ref().ok_or_else(|| anyhow!("corpus handle missing")) } - pub fn evaluation_user(&self) -> &User { - self.eval_user.as_ref().expect("evaluation user missing") + pub fn evaluation_user(&self) -> Result<&User> { + self.eval_user.as_ref().ok_or_else(|| anyhow!("evaluation user missing")) } + #[allow(clippy::arithmetic_side_effects)] pub fn record_stage_duration(&mut self, stage: EvalStage, duration: Duration) { let elapsed = duration.as_millis(); match stage { @@ -167,8 +167,8 @@ impl<'a> EvaluationContext<'a> { } } - pub fn into_summary(self) -> EvaluationSummary { - self.summary.expect("evaluation summary missing") + pub fn into_summary(self) -> Result { + self.summary.ok_or_else(|| anyhow!("evaluation summary missing")) } } @@ -184,7 +184,7 @@ pub(super) enum EvalStage { } impl EvalStage { - pub fn label(&self) -> &'static str { + pub fn label(self) -> &'static str { match self { EvalStage::PrepareSlice => "prepare-slice", EvalStage::PrepareDb => "prepare-db", diff --git a/evaluations/src/pipeline/mod.rs b/evaluations/src/pipeline/mod.rs index 6980557..d8386b5 100644 --- a/evaluations/src/pipeline/mod.rs +++ b/evaluations/src/pipeline/mod.rs @@ -23,5 +23,5 @@ pub async fn run_evaluation( let machine = stages::summarize(machine, &mut ctx).await?; let _ = stages::finalize(machine, &mut ctx).await?; - Ok(ctx.into_summary()) + ctx.into_summary() } diff --git a/evaluations/src/pipeline/stages/finalize.rs b/evaluations/src/pipeline/stages/finalize.rs index e706346..b54708b 100644 --- a/evaluations/src/pipeline/stages/finalize.rs +++ b/evaluations/src/pipeline/stages/finalize.rs @@ -55,5 +55,5 @@ pub(crate) async fn finalize( machine .finalize() - .map_err(|(_, guard)| map_guard_error("finalize", guard)) + .map_err(|(_, guard)| map_guard_error("finalize", &guard)) } diff --git a/evaluations/src/pipeline/stages/mod.rs b/evaluations/src/pipeline/stages/mod.rs index 2fb0187..356b532 100644 --- a/evaluations/src/pipeline/stages/mod.rs +++ b/evaluations/src/pipeline/stages/mod.rs @@ -19,7 +19,7 @@ use state_machines::core::GuardError; use super::state::EvaluationMachine; -fn map_guard_error(event: &str, guard: GuardError) -> anyhow::Error { +fn map_guard_error(event: &str, guard: &GuardError) -> anyhow::Error { anyhow::anyhow!("invalid evaluation pipeline transition during {event}: {guard:?}") } diff --git a/evaluations/src/pipeline/stages/prepare_corpus.rs b/evaluations/src/pipeline/stages/prepare_corpus.rs index fdf7f5e..a8a16f1 100644 --- a/evaluations/src/pipeline/stages/prepare_corpus.rs +++ b/evaluations/src/pipeline/stages/prepare_corpus.rs @@ -11,6 +11,7 @@ use super::super::{ }; use super::{map_guard_error, StageResult}; +#[allow(clippy::too_many_lines)] pub(crate) async fn prepare_corpus( machine: EvaluationMachine<(), DbReady>, ctx: &mut EvaluationContext<'_>, @@ -24,13 +25,13 @@ pub(crate) async fn prepare_corpus( let config = ctx.config(); let cache_settings = corpus::CorpusCacheConfig::from(config); - let embedding_provider = ctx.embedding_provider().clone(); - let openai_client = ctx.openai_client(); - let slice = ctx.slice(); + let embedding_provider = ctx.embedding_provider()?.clone(); + let openai_client = ctx.openai_client()?; + let slice = ctx.slice()?; let window = slice::select_window(slice, ctx.config().slice_offset, ctx.config().limit) .context("selecting slice window for corpus preparation")?; - let descriptor = snapshot::Descriptor::new(config, slice, ctx.embedding_provider()); + let descriptor = snapshot::Descriptor::new(config, slice, ctx.embedding_provider()?); let ingestion_config = corpus::make_ingestion_config(config); let expected_fingerprint = corpus::compute_ingestion_fingerprint( ctx.dataset(), @@ -47,7 +48,7 @@ pub(crate) async fn prepare_corpus( if !config.reseed_slice { let requested_cases = window.cases.len(); if can_reuse_namespace( - ctx.db(), + ctx.db()?, &descriptor, &ctx.namespace, &ctx.database, @@ -81,7 +82,7 @@ pub(crate) async fn prepare_corpus( return machine .prepare_corpus() - .map_err(|(_, guard)| map_guard_error("prepare_corpus", guard)); + .map_err(|(_, guard)| map_guard_error("prepare_corpus", &guard)); } info!( cache = %base_dir.display(), @@ -137,5 +138,5 @@ pub(crate) async fn prepare_corpus( machine .prepare_corpus() - .map_err(|(_, guard)| map_guard_error("prepare_corpus", guard)) + .map_err(|(_, guard)| map_guard_error("prepare_corpus", &guard)) } diff --git a/evaluations/src/pipeline/stages/prepare_db.rs b/evaluations/src/pipeline/stages/prepare_db.rs index a8666e9..89990b3 100644 --- a/evaluations/src/pipeline/stages/prepare_db.rs +++ b/evaluations/src/pipeline/stages/prepare_db.rs @@ -117,5 +117,5 @@ pub(crate) async fn prepare_db( machine .prepare_db() - .map_err(|(_, guard)| map_guard_error("prepare_db", guard)) + .map_err(|(_, guard)| map_guard_error("prepare_db", &guard)) } diff --git a/evaluations/src/pipeline/stages/prepare_namespace.rs b/evaluations/src/pipeline/stages/prepare_namespace.rs index 8ea7103..1af78d0 100644 --- a/evaluations/src/pipeline/stages/prepare_namespace.rs +++ b/evaluations/src/pipeline/stages/prepare_namespace.rs @@ -19,6 +19,7 @@ use super::super::{ }; use super::{map_guard_error, StageResult}; +#[allow(clippy::too_many_lines)] pub(crate) async fn prepare_namespace( machine: EvaluationMachine<(), CorpusReady>, ctx: &mut EvaluationContext<'_>, @@ -39,9 +40,9 @@ pub(crate) async fn prepare_namespace( .to_string(); let namespace = ctx.namespace.clone(); let database = ctx.database.clone(); - let embedding_provider = ctx.embedding_provider().clone(); + let embedding_provider = ctx.embedding_provider()?.clone(); - let corpus_handle = ctx.corpus_handle(); + let corpus_handle = ctx.corpus_handle()?; let base_manifest = &corpus_handle.manifest; let manifest_for_seed = if ctx.window_offset == 0 && ctx.window_length >= base_manifest.questions.len() { @@ -60,10 +61,10 @@ pub(crate) async fn prepare_namespace( let mut namespace_reused = false; if !config.reseed_slice { namespace_reused = { - let slice = ctx.slice(); + let slice = ctx.slice()?; can_reuse_namespace( - ctx.db(), - ctx.descriptor(), + ctx.db()?, + ctx.descriptor()?, &namespace, &database, dataset.metadata.id.as_str(), @@ -78,19 +79,19 @@ pub(crate) async fn prepare_namespace( let mut namespace_seed_ms = None; if !namespace_reused { ctx.must_reapply_settings = true; - if let Err(err) = reset_namespace(ctx.db(), &namespace, &database).await { + if let Err(err) = reset_namespace(ctx.db()?, &namespace, &database).await { warn!( error = %err, namespace, database = %database, "Failed to reset namespace before reseeding; continuing with existing data" ); - } else if let Err(err) = ctx.db().apply_migrations().await { + } else if let Err(err) = ctx.db()?.apply_migrations().await { warn!(error = %err, "Failed to reapply migrations after namespace reset"); } { - let slice = ctx.slice(); + let slice = ctx.slice()?; info!( slice = slice.manifest.slice_id.as_str(), window_offset = ctx.window_offset, @@ -113,10 +114,10 @@ pub(crate) async fn prepare_namespace( "Seeding ingestion corpus into SurrealDB" ); } - let indexes_disabled = remove_all_indexes(ctx.db()).await.is_ok(); + let indexes_disabled = remove_all_indexes(ctx.db()?).await.is_ok(); let seed_start = Instant::now(); - corpus::seed_manifest_into_db(ctx.db(), &manifest_for_seed) + corpus::seed_manifest_into_db(ctx.db()?, &manifest_for_seed) .await .context("seeding ingestion corpus from manifest")?; namespace_seed_ms = Some(seed_start.elapsed().as_millis()); @@ -124,15 +125,15 @@ pub(crate) async fn prepare_namespace( // Recreate indexes AFTER data is loaded (correct bulk loading pattern) if indexes_disabled { info!("Recreating indexes after seeding data"); - recreate_indexes(ctx.db(), embedding_provider.dimension()) + recreate_indexes(ctx.db()?, embedding_provider.dimension()) .await .context("recreating indexes with correct dimension")?; - warm_hnsw_cache(ctx.db(), embedding_provider.dimension()).await?; + warm_hnsw_cache(ctx.db()?, embedding_provider.dimension()).await?; } { - let slice = ctx.slice(); + let slice = ctx.slice()?; record_namespace_state( - ctx.descriptor(), + ctx.descriptor()?, dataset.metadata.id.as_str(), slice.manifest.slice_id.as_str(), expected_fingerprint.as_str(), @@ -145,17 +146,17 @@ pub(crate) async fn prepare_namespace( } if ctx.must_reapply_settings { - let mut settings = SystemSettings::get_current(ctx.db()) + let mut settings = SystemSettings::get_current(ctx.db()?) .await .context("reloading system settings after namespace reset")?; settings = - enforce_system_settings(ctx.db(), settings, embedding_provider.dimension(), config) + enforce_system_settings(ctx.db()?, settings, embedding_provider.dimension(), config) .await?; ctx.settings = Some(settings); ctx.must_reapply_settings = false; } - let user = ensure_eval_user(ctx.db()).await?; + let user = ensure_eval_user(ctx.db()?).await?; ctx.eval_user = Some(user); let total_manifest_questions = manifest_for_seed.questions.len(); @@ -199,5 +200,5 @@ pub(crate) async fn prepare_namespace( machine .prepare_namespace() - .map_err(|(_, guard)| map_guard_error("prepare_namespace", guard)) + .map_err(|(_, guard)| map_guard_error("prepare_namespace", &guard)) } diff --git a/evaluations/src/pipeline/stages/prepare_slice.rs b/evaluations/src/pipeline/stages/prepare_slice.rs index f5909bf..861c1c6 100644 --- a/evaluations/src/pipeline/stages/prepare_slice.rs +++ b/evaluations/src/pipeline/stages/prepare_slice.rs @@ -68,5 +68,5 @@ pub(crate) async fn prepare_slice( machine .prepare_slice() - .map_err(|(_, guard)| map_guard_error("prepare_slice", guard)) + .map_err(|(_, guard)| map_guard_error("prepare_slice", &guard)) } diff --git a/evaluations/src/pipeline/stages/run_queries.rs b/evaluations/src/pipeline/stages/run_queries.rs index b3e5887..be79df3 100644 --- a/evaluations/src/pipeline/stages/run_queries.rs +++ b/evaluations/src/pipeline/stages/run_queries.rs @@ -1,6 +1,6 @@ use std::{collections::HashSet, sync::Arc, time::Instant}; -use anyhow::Context; +use anyhow::{anyhow, Context}; use common::storage::types::StoredObject; use futures::stream::{self, StreamExt}; use tracing::{debug, info}; @@ -21,6 +21,7 @@ use super::super::{ }; use super::{map_guard_error, StageResult}; +#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects)] pub(crate) async fn run_queries( machine: EvaluationMachine<(), NamespaceReady>, ctx: &mut EvaluationContext<'_>, @@ -37,7 +38,7 @@ pub(crate) async fn run_queries( let slice_settings = ctx .slice_settings .as_ref() - .expect("slice settings missing during query stage"); + .ok_or_else(|| anyhow!("slice settings missing during query stage"))?; let total_cases = ctx.cases.len(); let cases_iter = std::mem::take(&mut ctx.cases).into_iter().enumerate(); @@ -115,9 +116,9 @@ pub(crate) async fn run_queries( chunk_rrf_fts_weight = active_tuning.chunk_rrf_fts_weight, chunk_rrf_use_vector = active_tuning.flags.chunk_rrf_use_vector.as_bool(), chunk_rrf_use_fts = active_tuning.flags.chunk_rrf_use_fts.as_bool(), - embedding_backend = ctx.embedding_provider().backend_label(), + embedding_backend = ctx.embedding_provider()?.backend_label(), embedding_model = ctx - .embedding_provider() + .embedding_provider()? .model_code() .as_deref() .unwrap_or(""), @@ -125,11 +126,11 @@ pub(crate) async fn run_queries( ); let retrieval_config = Arc::new(retrieval_config); - ctx.rerank_pool = rerank_pool.clone(); - ctx.retrieval_config = Some(retrieval_config.clone()); + ctx.rerank_pool.clone_from(&rerank_pool); + ctx.retrieval_config = Some(Arc::clone(&retrieval_config)); ctx.evaluation_start = Some(Instant::now()); - let user_id = ctx.evaluation_user().id.clone(); + let user_id = ctx.evaluation_user()?.id.clone(); let concurrency = config.concurrency.max(1); let diagnostics_enabled = ctx.diagnostics_enabled; @@ -141,20 +142,20 @@ pub(crate) async fn run_queries( "Starting evaluation with staged query execution" ); - let embedding_provider_for_queries = ctx.embedding_provider().clone(); + let embedding_provider_for_queries = ctx.embedding_provider()?.clone(); let rerank_pool_for_queries = rerank_pool.clone(); - let db = ctx.db().clone(); - let openai_client = ctx.openai_client(); + let db = ctx.db()?.clone(); + let openai_client = ctx.openai_client()?; let raw_results = stream::iter(cases_iter) .map(move |(idx, case)| { let db = db.clone(); - let openai_client = openai_client.clone(); + let openai_client = Arc::clone(&openai_client); let user_id = user_id.clone(); - let retrieval_config = retrieval_config.clone(); + let retrieval_config = Arc::clone(&retrieval_config); let embedding_provider = embedding_provider_for_queries.clone(); let rerank_pool = rerank_pool_for_queries.clone(); - let semaphore = query_semaphore.clone(); + let semaphore = Arc::clone(&query_semaphore); let diagnostics_enabled = diagnostics_enabled; async move { @@ -374,9 +375,10 @@ pub(crate) async fn run_queries( machine .run_queries() - .map_err(|(_, guard)| map_guard_error("run_queries", guard)) + .map_err(|(_, guard)| map_guard_error("run_queries", &guard)) } +#[allow(clippy::arithmetic_side_effects, clippy::cast_precision_loss)] fn calculate_reciprocal_rank(rank: Option) -> f64 { match rank { Some(r) if r > 0 => 1.0 / (r as f64), @@ -384,6 +386,7 @@ fn calculate_reciprocal_rank(rank: Option) -> f64 { } } +#[allow(clippy::arithmetic_side_effects, clippy::cast_precision_loss)] fn calculate_ndcg(retrieved: &[RetrievedSummary], k: usize) -> f64 { let mut dcg = 0.0; let mut relevant_count = 0; diff --git a/evaluations/src/pipeline/stages/summarize.rs b/evaluations/src/pipeline/stages/summarize.rs index a17d64f..c7391d4 100644 --- a/evaluations/src/pipeline/stages/summarize.rs +++ b/evaluations/src/pipeline/stages/summarize.rs @@ -13,6 +13,7 @@ use super::super::{ }; use super::{map_guard_error, StageResult}; +#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects, clippy::cast_precision_loss)] pub(crate) async fn summarize( machine: EvaluationMachine<(), QueriesFinished>, ctx: &mut EvaluationContext<'_>, @@ -34,8 +35,8 @@ pub(crate) async fn summarize( .unwrap_or_default(); let config = ctx.config(); let dataset = ctx.dataset(); - let slice = ctx.slice(); - let corpus_handle = ctx.corpus_handle(); + let slice = ctx.slice()?; + let corpus_handle = ctx.corpus_handle()?; let total_cases = summaries.len(); let mut correct = 0usize; @@ -176,7 +177,7 @@ pub(crate) async fn summarize( slice_total_paragraphs: slice.manifest.total_paragraphs, slice_negative_multiplier: slice.manifest.negative_multiplier, namespace_reused: ctx.namespace_reused, - corpus_paragraphs: ctx.corpus_handle().manifest.metadata.paragraph_count, + corpus_paragraphs: ctx.corpus_handle()?.manifest.metadata.paragraph_count, ingestion_cache_path: corpus_handle.path.display().to_string(), ingestion_reused: corpus_handle.reused_ingestion, ingestion_embeddings_reused: corpus_handle.reused_embeddings, @@ -189,9 +190,9 @@ pub(crate) async fn summarize( negative_paragraphs_reused: corpus_handle.negative_reused, latency_ms: latency_stats, perf: perf_timings, - embedding_backend: ctx.embedding_provider().backend_label().to_string(), - embedding_model: ctx.embedding_provider().model_code(), - embedding_dimension: ctx.embedding_provider().dimension(), + embedding_backend: ctx.embedding_provider()?.backend_label().to_string(), + embedding_model: ctx.embedding_provider()?.model_code(), + embedding_dimension: ctx.embedding_provider()?.dimension(), rerank_enabled: config.retrieval.rerank, rerank_pool_size: ctx .rerank_pool @@ -228,5 +229,5 @@ pub(crate) async fn summarize( machine .summarize() - .map_err(|(_, guard)| map_guard_error("summarize", guard)) + .map_err(|(_, guard)| map_guard_error("summarize", &guard)) } diff --git a/evaluations/src/report.rs b/evaluations/src/report.rs index ef637f1..d4608f3 100644 --- a/evaluations/src/report.rs +++ b/evaluations/src/report.rs @@ -1,4 +1,5 @@ use std::{ + fmt::Write, fs, path::{Path, PathBuf}, }; @@ -71,6 +72,7 @@ pub struct SliceSection { } #[derive(Debug, Clone, Serialize, Deserialize)] +#[allow(clippy::struct_excessive_bools)] pub struct RetrievalSection { pub k: usize, pub cases: usize, @@ -179,6 +181,7 @@ pub struct ReportOutcome { } impl EvaluationReport { + #[allow(clippy::too_many_lines)] pub fn from_summary(summary: &EvaluationSummary, sample: usize) -> Self { let overview = OverviewSection { generated_at: format_timestamp(&summary.generated_at), @@ -400,21 +403,17 @@ pub fn write_reports( }) } +#[allow(clippy::too_many_lines, clippy::write_with_newline, clippy::unwrap_used)] fn render_markdown(report: &EvaluationReport) -> String { let mut md = String::new(); - md.push_str(&format!( - "# Retrieval Evaluation (k={})\\n\\n", - report.retrieval.k - )); + write!(md, "# Retrieval Evaluation (k={})\\n\\n", report.retrieval.k).unwrap(); md.push_str("## Overview\\n\\n"); md.push_str("| Metric | Value |\\n| --- | --- |\\n"); - md.push_str(&format!( - "| Generated | {} |\\n", - report.overview.generated_at - )); - md.push_str(&format!( + write!(md, "| Generated | {} |\\n", report.overview.generated_at).unwrap(); + write!( + md, "| Run Label | {} |\\n", report .overview @@ -422,109 +421,55 @@ fn render_markdown(report: &EvaluationReport) -> String { .as_deref() .filter(|label| !label.is_empty()) .unwrap_or("-") - )); - md.push_str(&format!( - "| Total Cases | {} |\\n", - report.overview.total_cases - )); - md.push_str(&format!( - "| Filtered Questions | {} |\\n", - report.overview.filtered_questions - )); + ) + .unwrap(); + write!(md, "| Total Cases | {} |\\n", report.overview.total_cases).unwrap(); + write!(md, "| Filtered Questions | {} |\\n", report.overview.filtered_questions).unwrap(); md.push_str("\\n## Dataset & Slice\\n\\n"); md.push_str("| Metric | Value |\\n| --- | --- |\\n"); - md.push_str(&format!( - "| Dataset | {} (`{}`) |\\n", - report.dataset.label, report.dataset.id - )); - md.push_str(&format!( - "| Dataset Source | {} |\\n", - report.dataset.source - )); - md.push_str(&format!( - "| Includes Unanswerable | {} |\\n", - bool_badge(report.dataset.includes_unanswerable) - )); - md.push_str(&format!( - "| Require Verified Chunks | {} |\\n", - bool_badge(report.dataset.require_verified_chunks) - )); + write!(md, "| Dataset | {} (`{}`) |\\n", report.dataset.label, report.dataset.id).unwrap(); + write!(md, "| Dataset Source | {} |\\n", report.dataset.source).unwrap(); + write!(md, "| Includes Unanswerable | {} |\\n", bool_badge(report.dataset.includes_unanswerable)).unwrap(); + write!(md, "| Require Verified Chunks | {} |\\n", bool_badge(report.dataset.require_verified_chunks)).unwrap(); let embedding_label = if let Some(model) = report.dataset.embedding_model.as_ref() { format!("{} ({model})", report.dataset.embedding_backend) } else { report.dataset.embedding_backend.clone() }; - md.push_str(&format!("| Embedding | {embedding_label} |\\n")); - md.push_str(&format!( - "| Embedding Dim | {} |\\n", - report.dataset.embedding_dimension - )); - md.push_str(&format!("| Slice ID | `{}` |\\n", report.slice.id)); - md.push_str(&format!("| Slice Seed | {} |\\n", report.slice.seed)); - md.push_str(&format!( - "| Slice Window (offset/length) | {}/{} |\\n", - report.slice.window_offset, report.slice.window_length - )); - md.push_str(&format!( - "| Slice Questions (window/ledger) | {}/{} |\\n", - report.slice.slice_cases, report.slice.ledger_total_cases - )); - md.push_str(&format!( - "| Slice Positives / Negatives | {}/{} |\\n", - report.slice.positives, report.slice.negatives - )); - md.push_str(&format!( - "| Slice Paragraphs | {} |\\n", - report.slice.total_paragraphs - )); - md.push_str(&format!( - "| Negative Multiplier | {:.2} |\\n", - report.slice.negative_multiplier - )); + write!(md, "| Embedding | {embedding_label} |\\n").unwrap(); + write!(md, "| Embedding Dim | {} |\\n", report.dataset.embedding_dimension).unwrap(); + write!(md, "| Slice ID | `{}` |\\n", report.slice.id).unwrap(); + write!(md, "| Slice Seed | {} |\\n", report.slice.seed).unwrap(); + write!(md, "| Slice Window (offset/length) | {}/{} |\\n", report.slice.window_offset, report.slice.window_length).unwrap(); + write!(md, "| Slice Questions (window/ledger) | {}/{} |\\n", report.slice.slice_cases, report.slice.ledger_total_cases).unwrap(); + write!(md, "| Slice Positives / Negatives | {}/{} |\\n", report.slice.positives, report.slice.negatives).unwrap(); + write!(md, "| Slice Paragraphs | {} |\\n", report.slice.total_paragraphs).unwrap(); + write!(md, "| Negative Multiplier | {:.2} |\\n", report.slice.negative_multiplier).unwrap(); md.push_str("\\n## Retrieval Metrics\\n\\n"); md.push_str("| Metric | Value |\\n| --- | --- |\\n"); - md.push_str(&format!("| Cases | {} |\\n", report.retrieval.cases)); - md.push_str(&format!( - "| Correct@{} | {}/{} |\\n", - report.retrieval.k, report.retrieval.correct, report.retrieval.cases - )); - md.push_str(&format!( - "| Precision@{} | {:.3} |\\n", - report.retrieval.k, report.retrieval.precision - )); - md.push_str(&format!( + write!(md, "| Cases | {} |\\n", report.retrieval.cases).unwrap(); + write!(md, "| Correct@{} | {}/{} |\\n", report.retrieval.k, report.retrieval.correct, report.retrieval.cases).unwrap(); + write!(md, "| Precision@{} | {:.3} |\\n", report.retrieval.k, report.retrieval.precision).unwrap(); + write!( + md, "| Precision@1/2/3 | {:.3} / {:.3} / {:.3} |\\n", report.retrieval.precision_at_1, report.retrieval.precision_at_2, report.retrieval.precision_at_3 - )); - md.push_str(&format!("| MRR | {:.3} |\\n", report.retrieval.mrr)); - md.push_str(&format!( - "| NDCG | {:.3} |\\n", - report.retrieval.average_ndcg - )); - md.push_str(&format!( - "| Latency Avg / P50 / P95 (ms) | {:.1} / {} / {} |\\n", - report.retrieval.latency.avg, report.retrieval.latency.p50, report.retrieval.latency.p95 - )); - md.push_str(&format!( - "| Strategy | `{}` |\\n", - report.retrieval.strategy - )); - md.push_str(&format!( - "| Concurrency | {} |\\n", - report.retrieval.concurrency - )); + ) + .unwrap(); + write!(md, "| MRR | {:.3} |\\n", report.retrieval.mrr).unwrap(); + write!(md, "| NDCG | {:.3} |\\n", report.retrieval.average_ndcg).unwrap(); + write!(md, "| Latency Avg / P50 / P95 (ms) | {:.1} / {} / {} |\\n", report.retrieval.latency.avg, report.retrieval.latency.p50, report.retrieval.latency.p95).unwrap(); + write!(md, "| Strategy | `{}` |\\n", report.retrieval.strategy).unwrap(); + write!(md, "| Concurrency | {} |\\n", report.retrieval.concurrency).unwrap(); if report.retrieval.rerank_enabled { let pool = report .retrieval .rerank_pool_size.map_or_else(|| "?".into(), |size| size.to_string()); - md.push_str(&format!( - "| Rerank | enabled (pool {pool}, keep top {}) |\\n", - report.retrieval.rerank_keep_top - )); + write!(md, "| Rerank | enabled (pool {pool}, keep top {}) |\\n", report.retrieval.rerank_keep_top).unwrap(); } else { md.push_str("| Rerank | disabled |\\n"); } @@ -532,58 +477,36 @@ fn render_markdown(report: &EvaluationReport) -> String { if let Some(llm) = &report.llm { md.push_str("\\n## LLM Mode Metrics\\n\\n"); md.push_str("| Metric | Value |\\n| --- | --- |\\n"); - md.push_str(&format!("| Cases | {} |\\n", llm.cases)); - md.push_str(&format!("| Answered | {} |\\n", llm.answered)); - md.push_str(&format!("| Precision | {:.3} |\\n", llm.precision)); + write!(md, "| Cases | {} |\\n", llm.cases).unwrap(); + write!(md, "| Answered | {} |\\n", llm.answered).unwrap(); + write!(md, "| Precision | {:.3} |\\n", llm.precision).unwrap(); } md.push_str("\\n## Performance\\n\\n"); md.push_str("| Metric | Value |\\n| --- | --- |\\n"); - md.push_str(&format!( - "| OpenAI Base URL | {} |\\n", - report.performance.openai_base_url - )); - md.push_str(&format!( - "| Ingestion Duration | {} ms |\\n", - report.performance.ingestion_ms - )); + write!(md, "| OpenAI Base URL | {} |\\n", report.performance.openai_base_url).unwrap(); + write!(md, "| Ingestion Duration | {} ms |\\n", report.performance.ingestion_ms).unwrap(); if let Some(seed) = report.performance.namespace_seed_ms { - md.push_str(&format!("| Namespace Seed | {seed} ms |\\n")); + write!(md, "| Namespace Seed | {seed} ms |\\n").unwrap(); } - md.push_str(&format!( + write!( + md, "| Namespace State | {} |\\n", if report.performance.namespace_reused { "reused" } else { "seeded" } - )); - md.push_str(&format!( - "| Corpus Paragraphs | {} |\\n", - report.performance.corpus_paragraphs - )); + ) + .unwrap(); + write!(md, "| Corpus Paragraphs | {} |\\n", report.performance.corpus_paragraphs).unwrap(); if report.detailed_report { - md.push_str(&format!( - "| Ingestion Cache | `{}` |\\n", - report.performance.ingestion_cache_path - )); - md.push_str(&format!( - "| Ingestion Reused | {} |\\n", - bool_badge(report.performance.ingestion_reused) - )); - md.push_str(&format!( - "| Embeddings Reused | {} |\\n", - bool_badge(report.performance.embeddings_reused) - )); + write!(md, "| Ingestion Cache | `{}` |\\n", report.performance.ingestion_cache_path).unwrap(); + write!(md, "| Ingestion Reused | {} |\\n", bool_badge(report.performance.ingestion_reused)).unwrap(); + write!(md, "| Embeddings Reused | {} |\\n", bool_badge(report.performance.embeddings_reused)).unwrap(); } - md.push_str(&format!( - "| Positives Cached | {} |\\n", - report.performance.positive_paragraphs_reused - )); - md.push_str(&format!( - "| Negatives Cached | {} |\\n", - report.performance.negative_paragraphs_reused - )); + write!(md, "| Positives Cached | {} |\\n", report.performance.positive_paragraphs_reused).unwrap(); + write!(md, "| Negatives Cached | {} |\\n", report.performance.negative_paragraphs_reused).unwrap(); md.push_str("\\n## Retrieval Stage Timings\\n\\n"); md.push_str("| Stage | Avg (ms) | P50 (ms) | P95 (ms) |\\n| --- | --- | --- | --- |\\n"); @@ -635,7 +558,8 @@ fn render_markdown(report: &EvaluationReport) -> String { for case in &report.misses { let retrieved = render_retrieved(&case.retrieved); if report.detailed_report { - md.push_str(&format!( + write!( + md, "| `{}` | {} | `{}` | {} | {} | {} | {} |\\n", case.question_id, case.paragraph_title, @@ -644,12 +568,15 @@ fn render_markdown(report: &EvaluationReport) -> String { bool_badge(case.chunk_text_match), bool_badge(case.chunk_id_match), retrieved - )); + ) + .unwrap(); } else { - md.push_str(&format!( + write!( + md, "| `{}` | {} | `{}` | {} |\\n", case.question_id, case.paragraph_title, case.expected_source, retrieved - )); + ) + .unwrap(); } } } @@ -671,24 +598,29 @@ fn render_markdown(report: &EvaluationReport) -> String { let retrieved = render_retrieved(&case.retrieved); let rank = case .match_rank.map_or_else(|| "-".into(), |rank| rank.to_string()); - md.push_str(&format!( + write!( + md, "| `{}` | {} | {} | {} |\\n", case.question_id, bool_badge(case.answered), rank, retrieved - )); + ) + .unwrap(); } } } md } +#[allow(clippy::write_with_newline, clippy::unwrap_used)] fn write_stage_row(buf: &mut String, label: &str, stats: &LatencyStats) { - buf.push_str(&format!( - "| {} | {:.1} | {} | {} |\n", + writeln!( + buf, + "| {} | {:.1} | {} | {} |", label, stats.avg, stats.p50, stats.p95 - )); + ) + .unwrap(); } fn bool_badge(value: bool) -> &'static str { @@ -819,6 +751,7 @@ fn default_stage_latency() -> StageLatencyBreakdown { } } +#[allow(clippy::too_many_lines)] fn convert_legacy_entry(entry: LegacyHistoryEntry) -> EvaluationReport { let overview = OverviewSection { generated_at: entry.generated_at, @@ -987,6 +920,7 @@ mod tests { use chrono::Utc; use tempfile::tempdir; + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] fn latency(ms: f64) -> LatencyStats { LatencyStats { avg: ms, @@ -1087,7 +1021,7 @@ mod tests { retrieval_precision: 1.0, average_ndcg: 0.0, mrr: 0.0, - llm_cases: if include_llm { 1 } else { 0 }, + llm_cases: usize::from(include_llm), llm_answered: 0, llm_precision: 0.0, slice_id: "slice".into(), @@ -1161,6 +1095,7 @@ mod tests { assert!(!md.contains("LLM-Only Cases")); } + #[allow(clippy::unwrap_used, clippy::expect_used, clippy::indexing_slicing)] #[test] fn evaluations_history_captures_strategy_and_concurrency() { let tmp = tempdir().unwrap(); diff --git a/evaluations/src/settings.rs b/evaluations/src/settings.rs index ba8f611..e642681 100644 --- a/evaluations/src/settings.rs +++ b/evaluations/src/settings.rs @@ -10,6 +10,7 @@ use tracing::info; use crate::args::Config; /// Enforce evaluation-specific system settings overrides. +#[allow(clippy::cast_possible_truncation)] pub(crate) async fn enforce_system_settings( db: &SurrealDbClient, mut settings: SystemSettings, diff --git a/evaluations/src/slice.rs b/evaluations/src/slice.rs index db8bee5..bbf199a 100644 --- a/evaluations/src/slice.rs +++ b/evaluations/src/slice.rs @@ -1,5 +1,6 @@ use std::{ collections::{HashMap, HashSet, VecDeque}, + fmt::Write, fs, path::{Path, PathBuf}, }; @@ -98,10 +99,10 @@ fn sanitize_identifier(input: &str) -> String { let mut hasher = Sha256::new(); hasher.update(input.as_bytes()); let digest = hasher.finalize(); - digest[..6] - .iter() - .map(|byte| format!("{byte:02x}")) - .collect::() + digest.iter().take(6).fold(String::with_capacity(12), |mut s, b| { + let _ = write!(s, "{b:02x}"); + s + }) } else { trimmed } @@ -168,7 +169,7 @@ impl DatasetIndex { .paragraph_by_id .get(id) .ok_or_else(|| anyhow!("slice references unknown paragraph '{id}'"))?; - Ok(&dataset.paragraphs[*idx]) + dataset.paragraphs.get(*idx).ok_or_else(|| anyhow!("paragraph index out of bounds")) } fn question<'a>( @@ -180,7 +181,8 @@ impl DatasetIndex { .question_by_id .get(question_id) .ok_or_else(|| anyhow!("slice references unknown question '{question_id}'"))?; - let paragraph = &dataset.paragraphs[*p_idx]; + let paragraph = dataset.paragraphs.get(*p_idx) + .ok_or_else(|| anyhow!("paragraph index out of bounds for question '{question_id}'"))?; let question = paragraph .questions .get(*q_idx) @@ -205,6 +207,7 @@ struct BuildParams { rng_seed: u64, } +#[allow(clippy::too_many_lines)] pub fn resolve_slice<'a>( dataset: &'a ConvertedDataset, config: &SliceConfig<'_>, @@ -237,7 +240,7 @@ pub fn resolve_slice<'a>( requested_corpus, seed: config.slice_seed, }; - let slice_id = compute_slice_id(&key); + let slice_id = compute_slice_id(&key)?; let base = config .cache_dir .join("slices") @@ -360,7 +363,7 @@ pub fn resolve_slice<'a>( ¶ms, desired_negatives, requested_corpus, - )?; + ); refresh_manifest_stats(&mut manifest); if changed { @@ -390,6 +393,7 @@ pub fn resolve_slice<'a>( Ok(resolved) } +#[allow(clippy::indexing_slicing, clippy::arithmetic_side_effects)] pub fn select_window<'a>( resolved: &'a ResolvedSlice<'a>, offset: usize, @@ -404,9 +408,7 @@ pub fn select_window<'a>( } if offset >= total { return Err(anyhow!( - "slice offset {} exceeds available cases ({})", - offset, - total + "slice offset {offset} exceeds available cases ({total})", )); } let available = total - offset; @@ -516,6 +518,7 @@ fn empty_manifest( } } +#[allow(clippy::indexing_slicing, clippy::arithmetic_side_effects)] fn ensure_case_capacity( dataset: &ConvertedDataset, manifest: &mut SliceManifest, @@ -631,6 +634,7 @@ fn ordered_question_refs( Ok(question_refs) } +#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects)] fn ordered_question_refs_beir( dataset: &ConvertedDataset, params: &BuildParams, @@ -795,20 +799,21 @@ fn question_prefix(question_id: &str) -> Option<&'static str> { None } +#[allow(clippy::indexing_slicing)] fn ensure_negative_pool( dataset: &ConvertedDataset, manifest: &mut SliceManifest, params: &BuildParams, target_negatives: usize, requested_corpus: usize, -) -> Result { +) -> bool { let current_negatives = manifest .paragraphs .iter() .filter(|entry| matches!(entry.kind, SliceParagraphKind::Negative)) .count(); if current_negatives >= target_negatives { - return Ok(false); + return false; } let positive_ids: HashSet = manifest @@ -816,7 +821,7 @@ fn ensure_negative_pool( .iter() .filter_map(|entry| match entry.kind { SliceParagraphKind::Positive { .. } => Some(entry.id.clone()), - _ => None, + SliceParagraphKind::Negative => None, }) .collect(); let mut negative_ids: HashSet = manifest @@ -824,7 +829,7 @@ fn ensure_negative_pool( .iter() .filter_map(|entry| match entry.kind { SliceParagraphKind::Negative => Some(entry.id.clone()), - _ => None, + SliceParagraphKind::Positive { .. } => None, }) .collect(); @@ -863,7 +868,7 @@ fn ensure_negative_pool( ); } - Ok(added) + added } fn ordered_negative_indices( @@ -914,6 +919,7 @@ fn ensure_shard_paths(manifest: &mut SliceManifest) -> bool { changed } +#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation, clippy::cast_sign_loss)] fn desired_negative_target( positive_count: usize, requested_corpus: usize, @@ -996,17 +1002,18 @@ fn manifest_to_resolved<'a>( }) } -fn compute_slice_id(key: &SliceKey<'_>) -> String { - let payload = serde_json::to_vec(key).expect("SliceKey serialisation should not fail"); +fn compute_slice_id(key: &SliceKey<'_>) -> Result { + let payload = serde_json::to_vec(key).context("SliceKey serialisation failed")?; let mut hasher = Sha256::new(); hasher.update(payload); let digest = hasher.finalize(); - digest[..16] - .iter() - .map(|byte| format!("{byte:02x}")) - .collect::() + Ok(digest.iter().take(16).fold(String::with_capacity(32), |mut s, b| { + let _ = write!(s, "{b:02x}"); + s + })) } +#[allow(clippy::indexing_slicing)] fn mix_seed(dataset_id: &str, seed: u64) -> u64 { let mut hasher = Sha256::new(); hasher.update(dataset_id.as_bytes()); @@ -1146,6 +1153,7 @@ mod tests { } #[test] + #[allow(clippy::indexing_slicing)] fn select_window_yields_expected_cases() -> Result<()> { let dataset = sample_dataset(); let temp = tempdir().context("creating temp directory")?; @@ -1177,6 +1185,7 @@ mod tests { } #[test] + #[allow(clippy::indexing_slicing)] fn beir_mix_balances_and_rebalances() -> Result<()> { let mut paragraphs = Vec::new(); let counts = [ diff --git a/evaluations/src/snapshot.rs b/evaluations/src/snapshot.rs index bd41c35..5b1a827 100644 --- a/evaluations/src/snapshot.rs +++ b/evaluations/src/snapshot.rs @@ -121,6 +121,7 @@ impl Descriptor { } } +#[allow(clippy::expect_used)] fn compute_hash(metadata: &SnapshotMetadata) -> String { let mut hasher = Sha256::new(); hasher.update( @@ -134,6 +135,7 @@ mod tests { use super::*; #[tokio::test] + #[allow(clippy::unwrap_used, clippy::expect_used)] async fn state_round_trip() { let temp_dir = tempfile::tempdir().unwrap(); let metadata = SnapshotMetadata { diff --git a/evaluations/src/types.rs b/evaluations/src/types.rs index 51fe19f..481b2ae 100644 --- a/evaluations/src/types.rs +++ b/evaluations/src/types.rs @@ -8,6 +8,7 @@ use retrieval_pipeline::{ use serde::{Deserialize, Serialize}; use unicode_normalization::UnicodeNormalization; +#[allow(clippy::struct_excessive_bools)] #[derive(Debug, Serialize)] pub struct EvaluationSummary { pub generated_at: DateTime, @@ -86,6 +87,7 @@ pub struct EvaluationSummary { pub cases: Vec, } +#[allow(clippy::struct_excessive_bools)] #[derive(Debug, Serialize)] pub struct CaseSummary { pub question_id: String, @@ -137,6 +139,7 @@ pub struct StageLatencyBreakdown { pub assemble: LatencyStats, } +#[allow(clippy::struct_field_names)] #[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct EvaluationStageTimings { pub prepare_slice_ms: u128, @@ -345,6 +348,7 @@ fn chunk_snippet(text: &str) -> String { acc } +#[allow(clippy::cast_precision_loss)] pub fn compute_latency_stats(latencies: &[u128]) -> LatencyStats { if latencies.is_empty() { return LatencyStats { @@ -386,6 +390,13 @@ pub fn build_stage_latency_breakdown(samples: &[PipelineStageTimings]) -> StageL } } +#[allow( + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::cast_precision_loss, + clippy::indexing_slicing, + clippy::arithmetic_side_effects +)] fn percentile(sorted: &[u128], fraction: f64) -> u128 { if sorted.is_empty() { return 0; @@ -395,6 +406,7 @@ fn percentile(sorted: &[u128], fraction: f64) -> u128 { sorted[idx.min(sorted.len() - 1)] } +#[allow(clippy::arithmetic_side_effects)] pub fn build_case_diagnostics( summary: &CaseSummary, expected_chunk_ids: &[String], diff --git a/html-router/assets/style.css b/html-router/assets/style.css index 3b94d07..3431eb7 100644 --- a/html-router/assets/style.css +++ b/html-router/assets/style.css @@ -44,7 +44,6 @@ --leading-snug: 1.375; --leading-relaxed: 1.625; --ease-out: cubic-bezier(0, 0, 0.2, 1); - --ease-in-out: cubic-bezier(0.4, 0, 0.2, 1); --animate-pulse: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite; --default-transition-duration: 150ms; --default-transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1); @@ -285,37 +284,6 @@ } } } - .drawer-open { - > .drawer-side { - overflow-y: auto; - } - > .drawer-toggle { - display: none; - & ~ .drawer-side { - pointer-events: auto; - visibility: visible; - position: sticky; - display: block; - width: auto; - overscroll-behavior: auto; - opacity: 100%; - & > .drawer-overlay { - cursor: default; - background-color: transparent; - } - & > *:not(.drawer-overlay) { - translate: 0%; - [dir="rtl"] & { - translate: 0%; - } - } - } - &:checked ~ .drawer-side { - pointer-events: auto; - visibility: visible; - } - } - } .drawer-toggle { position: fixed; height: calc(0.25rem * 0); @@ -1074,22 +1042,6 @@ grid-row-start: 1; min-width: calc(0.25rem * 0); } - .chat-image { - grid-row: span 2 / span 2; - align-self: flex-end; - } - .chat-footer { - grid-row-start: 3; - display: flex; - gap: calc(0.25rem * 1); - font-size: 0.6875rem; - } - .chat-header { - grid-row-start: 1; - display: flex; - gap: calc(0.25rem * 1); - font-size: 0.6875rem; - } .container { width: 100%; @media (width >= 40rem) { @@ -1796,9 +1748,6 @@ .w-10 { width: calc(var(--spacing) * 10); } - .w-11 { - width: calc(var(--spacing) * 11); - } .w-11\/12 { width: calc(11/12 * 100%); } @@ -1862,9 +1811,6 @@ .flex-none { flex: none; } - .flex-shrink { - flex-shrink: 1; - } .flex-shrink-0 { flex-shrink: 0; } @@ -1877,13 +1823,6 @@ .grow { flex-grow: 1; } - .border-collapse { - border-collapse: collapse; - } - .-translate-y-1 { - --tw-translate-y: calc(var(--spacing) * -1); - translate: var(--tw-translate-x) var(--tw-translate-y); - } .-translate-y-1\/2 { --tw-translate-y: calc(calc(1/2 * 100%) * -1); translate: var(--tw-translate-x) var(--tw-translate-y); @@ -1956,9 +1895,6 @@ .justify-start { justify-content: flex-start; } - .gap-0 { - gap: calc(var(--spacing) * 0); - } .gap-0\.5 { gap: calc(var(--spacing) * 0.5); } @@ -2115,9 +2051,6 @@ .bg-transparent { background-color: transparent; } - .bg-warning { - background-color: var(--color-warning); - } .bg-warning\/10 { background-color: var(--color-warning); @supports (color: color-mix(in lab, red, red)) { @@ -2136,9 +2069,6 @@ .loading-spinner { mask-image: url("data:image/svg+xml,%3Csvg width='24' height='24' stroke='black' viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cg transform-origin='center'%3E%3Ccircle cx='12' cy='12' r='9.5' fill='none' stroke-width='3' stroke-linecap='round'%3E%3CanimateTransform attributeName='transform' type='rotate' from='0 12 12' to='360 12 12' dur='2s' repeatCount='indefinite'/%3E%3Canimate attributeName='stroke-dasharray' values='0,150;42,150;42,150' keyTimes='0;0.475;1' dur='1.5s' repeatCount='indefinite'/%3E%3Canimate attributeName='stroke-dashoffset' values='0;-16;-59' keyTimes='0;0.475;1' dur='1.5s' repeatCount='indefinite'/%3E%3C/circle%3E%3C/g%3E%3C/svg%3E"); } - .mask-repeat { - mask-repeat: repeat; - } .fill-current { fill: currentcolor; } @@ -2169,9 +2099,6 @@ .p-8 { padding: calc(var(--spacing) * 8); } - .px-1 { - padding-inline: calc(var(--spacing) * 1); - } .px-1\.5 { padding-inline: calc(var(--spacing) * 1.5); } @@ -2326,9 +2253,6 @@ --tw-tracking: var(--tracking-widest); letter-spacing: var(--tracking-widest); } - .text-wrap { - text-wrap: wrap; - } .break-words { overflow-wrap: break-word; } @@ -2395,17 +2319,6 @@ .italic { font-style: italic; } - .underline { - text-decoration-line: underline; - } - .swap-active { - .swap-off { - opacity: 0%; - } - .swap-on { - opacity: 100%; - } - } .opacity-0 { opacity: 0%; } @@ -2496,10 +2409,6 @@ --tw-duration: 300ms; transition-duration: 300ms; } - .ease-in-out { - --tw-ease: var(--ease-in-out); - transition-timing-function: var(--ease-in-out); - } .ease-out { --tw-ease: var(--ease-out); transition-timing-function: var(--ease-out); diff --git a/html-router/src/middlewares/response_middleware.rs b/html-router/src/middlewares/response_middleware.rs index 7374005..bd83f1c 100644 --- a/html-router/src/middlewares/response_middleware.rs +++ b/html-router/src/middlewares/response_middleware.rs @@ -10,7 +10,7 @@ use axum::{ use axum_htmx::{HxRequest, HX_TRIGGER}; use common::{ error::AppError, - utils::template_engine::{ProvidesTemplateEngine, TemplateEngine, Value}, + utils::template_engine::{ProvidesTemplateEngine, Value}, }; use minijinja::context; use serde::Serialize; diff --git a/html-router/src/routes/chat/message_response_stream.rs b/html-router/src/routes/chat/message_response_stream.rs index e54b16d..6f4cd8c 100644 --- a/html-router/src/routes/chat/message_response_stream.rs +++ b/html-router/src/routes/chat/message_response_stream.rs @@ -333,7 +333,7 @@ async fn prepare_chat_request( history: &[Message], ) -> Result< (async_openai::types::CreateChatCompletionRequest, Vec), - Sse> + Send>>>, + SseResponse, > { let rerank_lease = match state.reranker_pool.as_ref() { Some(pool) => pool.checkout().await, @@ -356,7 +356,7 @@ async fn prepare_chat_request( { Ok(result) => result, Err(_e) => { - return Err(Sse::new(create_error_stream("Failed to retrieve knowledge"))); + return Err(sse_with_keep_alive(create_error_stream("Failed to retrieve knowledge"))); } }; @@ -365,7 +365,7 @@ async fn prepare_chat_request( let context_json = match retrieval_result { retrieval_pipeline::StrategyOutput::Chunks(chunks) => chunks_to_chat_context(&chunks), retrieval_pipeline::StrategyOutput::Entities(entities) => { - retrieved_entities_to_json(entities) + retrieved_entities_to_json(&entities) } retrieval_pipeline::StrategyOutput::Search(search_result) => { chunks_to_chat_context(&search_result.chunks) @@ -374,10 +374,10 @@ async fn prepare_chat_request( let formatted_user_message = create_user_message_with_history(&context_json, history, &user_message.content); let Ok(settings) = SystemSettings::get_current(&state.db).await else { - return Err(Sse::new(create_error_stream("Failed to retrieve system settings"))); + return Err(sse_with_keep_alive(create_error_stream("Failed to retrieve system settings"))); }; let Ok(request) = create_chat_request(formatted_user_message, &settings) else { - return Err(Sse::new(create_error_stream("Failed to create chat request"))); + return Err(sse_with_keep_alive(create_error_stream("Failed to create chat request"))); }; Ok((request, allowed_reference_ids)) @@ -415,7 +415,7 @@ fn spawn_storage_task( Err(err) => { error!(error = %err, "Reference validation failed, storing answer without references"); let ai_message = Message::new( - user_message.conversation_id, + conversation_id.clone(), MessageRole::AI, answer, Some(Vec::new()), diff --git a/main/src/main.rs b/main/src/main.rs index 9083d10..2b634fa 100644 --- a/main/src/main.rs +++ b/main/src/main.rs @@ -161,6 +161,7 @@ mod tests { Router, }; use common::storage::{ + db::SurrealDbClient, store::StorageManager, types::{system_settings::SystemSettings, user::User}, }; @@ -195,9 +196,17 @@ mod tests { .expect("failed to create temp data directory"); let config = smoke_test_config(namespace, &database, &data_dir); - let services = crate::bootstrap::init_with_config(config.clone()).await?; + let services = crate::bootstrap::init_with_config(config.clone()) + .await + .expect("failed to init services"); - let session_store = Arc::new(services.db.create_session_store().await?); + let session_store = Arc::new( + services + .db + .create_session_store() + .await + .expect("failed to create session store"), + ); let html_state = HtmlState::new_with_resources(StateResources { db: Arc::clone(&services.db), @@ -224,7 +233,7 @@ mod tests { html_state, }); - (app, db, data_dir) + (app, services.db, data_dir) } fn assert_redirect_to(response: &Response, expected_location: &str) { @@ -289,22 +298,25 @@ mod tests { .oneshot( Request::builder() .uri("/api/v1/live") - .body(Body::empty())?, + .body(Body::empty()) + .expect("building live request"), ) - .await?; + .await + .expect("sending live request"); assert_eq!(response.status(), StatusCode::OK); let ready_response = app .oneshot( Request::builder() .uri("/api/v1/ready") - .body(Body::empty())?, + .body(Body::empty()) + .expect("building ready request"), ) - .await?; + .await + .expect("sending ready request"); assert_eq!(ready_response.status(), StatusCode::OK); tokio::fs::remove_dir_all(&data_dir).await.ok(); - Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)]