benchmarks: fin

2026-05-05 23:43:55 +02:00 · 2025-12-08 21:57:53 +01:00
parent 0cb1abc6db
commit a8d10f265c
39 changed files with 774 additions and 714 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -1,2 +1,2 @@
 [alias]
-eval = "run -p eval --"
+eval = "run -p evaluations --"
--- a/eval/src/eval/mod.rs
+++ b/eval/src/eval/mod.rs
@@ -1,560 +0,0 @@
-mod pipeline;
-mod types;
-
-pub use pipeline::run_evaluation;
-pub use types::*;
-
-use std::{collections::HashMap, path::Path};
-
-use anyhow::{anyhow, Context, Result};
-use chrono::{DateTime, SecondsFormat, Utc};
-use common::{
-    error::AppError,
-    storage::{
-        db::SurrealDbClient,
-        types::{system_settings::SystemSettings, user::User, StoredObject},
-    },
-};
-use serde::Deserialize;
-use tokio::io::AsyncWriteExt;
-use tracing::{info, warn};
-
-use crate::{
-    args::{self, Config},
-    datasets::{self, ConvertedDataset},
-    ingest,
-    slice::{self},
-    snapshot::{self, DbSnapshotState},
-};
-
-pub(crate) struct SeededCase {
-    question_id: String,
-    question: String,
-    expected_source: String,
-    answers: Vec<String>,
-    paragraph_id: String,
-    paragraph_title: String,
-    expected_chunk_ids: Vec<String>,
-    is_impossible: bool,
-    has_verified_chunks: bool,
-}
-
-pub(crate) fn cases_from_manifest(manifest: &ingest::CorpusManifest) -> Vec<SeededCase> {
-    let mut title_map = HashMap::new();
-    for paragraph in &manifest.paragraphs {
-        title_map.insert(paragraph.paragraph_id.as_str(), paragraph.title.clone());
-    }
-
-    let include_impossible = manifest.metadata.include_unanswerable;
-    let require_verified_chunks = manifest.metadata.require_verified_chunks;
-
-    manifest
-        .questions
-        .iter()
-        .filter(|question| {
-            should_include_question(question, include_impossible, require_verified_chunks)
-        })
-        .map(|question| {
-            let title = title_map
-                .get(question.paragraph_id.as_str())
-                .cloned()
-                .unwrap_or_else(|| "Untitled".to_string());
-            SeededCase {
-                question_id: question.question_id.clone(),
-                question: question.question_text.clone(),
-                expected_source: question.text_content_id.clone(),
-                answers: question.answers.clone(),
-                paragraph_id: question.paragraph_id.clone(),
-                paragraph_title: title,
-                expected_chunk_ids: question.matching_chunk_ids.clone(),
-                is_impossible: question.is_impossible,
-                has_verified_chunks: !question.matching_chunk_ids.is_empty(),
-            }
-        })
-        .collect()
-}
-
-fn should_include_question(
-    question: &ingest::CorpusQuestion,
-    include_impossible: bool,
-    require_verified_chunks: bool,
-) -> bool {
-    if !include_impossible && question.is_impossible {
-        return false;
-    }
-    if require_verified_chunks && question.matching_chunk_ids.is_empty() {
-        return false;
-    }
-    true
-}
-
-pub async fn grow_slice(dataset: &ConvertedDataset, config: &Config) -> Result<()> {
-    let ledger_limit = ledger_target(config);
-    let slice_settings = slice::slice_config_with_limit(config, ledger_limit);
-    let slice =
-        slice::resolve_slice(dataset, &slice_settings).context("resolving dataset slice")?;
-    info!(
-        slice = slice.manifest.slice_id.as_str(),
-        cases = slice.manifest.case_count,
-        positives = slice.manifest.positive_paragraphs,
-        negatives = slice.manifest.negative_paragraphs,
-        total_paragraphs = slice.manifest.total_paragraphs,
-        "Slice ledger ready"
-    );
-    println!(
-        "Slice `{}` now contains {} questions ({} positives, {} negatives)",
-        slice.manifest.slice_id,
-        slice.manifest.case_count,
-        slice.manifest.positive_paragraphs,
-        slice.manifest.negative_paragraphs
-    );
-    Ok(())
-}
-
-pub(crate) fn ledger_target(config: &Config) -> Option<usize> {
-    match (config.slice_grow, config.limit) {
-        (Some(grow), Some(limit)) => Some(limit.max(grow)),
-        (Some(grow), None) => Some(grow),
-        (None, limit) => limit,
-    }
-}
-
-pub(crate) async fn write_chunk_diagnostics(path: &Path, cases: &[CaseDiagnostics]) -> Result<()> {
-    args::ensure_parent(path)?;
-    let mut file = tokio::fs::File::create(path)
-        .await
-        .with_context(|| format!("creating diagnostics file {}", path.display()))?;
-    for case in cases {
-        let line = serde_json::to_vec(case).context("serialising chunk diagnostics entry")?;
-        file.write_all(&line).await?;
-        file.write_all(b"\n").await?;
-    }
-    file.flush().await?;
-    Ok(())
-}
-
-pub(crate) async fn warm_hnsw_cache(db: &SurrealDbClient, dimension: usize) -> Result<()> {
-    // Create a dummy embedding for cache warming
-    let dummy_embedding: Vec<f32> = (0..dimension).map(|i| (i as f32).sin()).collect();
-
-    info!("Warming HNSW caches with sample queries");
-
-    // Warm up chunk embedding index - just query the embedding table to load HNSW index
-    let _ = db
-        .client
-        .query(
-            r#"SELECT chunk_id
-               FROM text_chunk_embedding
-               WHERE embedding <|1,1|> $embedding
-               LIMIT 5"#,
-        )
-        .bind(("embedding", dummy_embedding.clone()))
-        .await
-        .context("warming text chunk HNSW cache")?;
-
-    // Warm up entity embedding index
-    let _ = db
-        .client
-        .query(
-            r#"SELECT entity_id
-               FROM knowledge_entity_embedding
-               WHERE embedding <|1,1|> $embedding
-               LIMIT 5"#,
-        )
-        .bind(("embedding", dummy_embedding))
-        .await
-        .context("warming knowledge entity HNSW cache")?;
-
-    info!("HNSW cache warming completed");
-    Ok(())
-}
-
-pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
-    let timestamp = datasets::base_timestamp();
-    let user = User {
-        id: "eval-user".to_string(),
-        created_at: timestamp,
-        updated_at: timestamp,
-        email: "eval-retrieval@minne.dev".to_string(),
-        password: "not-used".to_string(),
-        anonymous: false,
-        api_key: None,
-        admin: false,
-        timezone: "UTC".to_string(),
-    };
-
-    if let Some(existing) = db.get_item::<User>(&user.get_id()).await? {
-        return Ok(existing);
-    }
-
-    db.store_item(user.clone())
-        .await
-        .context("storing evaluation user")?;
-    Ok(user)
-}
-
-pub fn format_timestamp(timestamp: &DateTime<Utc>) -> String {
-    timestamp.to_rfc3339_opts(SecondsFormat::Secs, true)
-}
-
-pub(crate) fn sanitize_model_code(code: &str) -> String {
-    code.chars()
-        .map(|ch| {
-            if ch.is_ascii_alphanumeric() {
-                ch.to_ascii_lowercase()
-            } else {
-                '_'
-            }
-        })
-        .collect()
-}
-
-pub(crate) async fn connect_eval_db(
-    config: &Config,
-    namespace: &str,
-    database: &str,
-) -> Result<SurrealDbClient> {
-    match SurrealDbClient::new(
-        &config.db_endpoint,
-        &config.db_username,
-        &config.db_password,
-        namespace,
-        database,
-    )
-    .await
-    {
-        Ok(client) => {
-            info!(
-                endpoint = %config.db_endpoint,
-                namespace,
-                database,
-                auth = "root",
-                "Connected to SurrealDB"
-            );
-            Ok(client)
-        }
-        Err(root_err) => {
-            info!(
-                endpoint = %config.db_endpoint,
-                namespace,
-                database,
-                "Root authentication failed; trying namespace-level auth"
-            );
-            let namespace_client = SurrealDbClient::new_with_namespace_user(
-                &config.db_endpoint,
-                namespace,
-                &config.db_username,
-                &config.db_password,
-                database,
-            )
-            .await
-            .map_err(|ns_err| {
-                anyhow!(
-                    "failed to connect to SurrealDB via root ({root_err}) or namespace ({ns_err}) credentials"
-                )
-            })?;
-            info!(
-                endpoint = %config.db_endpoint,
-                namespace,
-                database,
-                auth = "namespace",
-                "Connected to SurrealDB"
-            );
-            Ok(namespace_client)
-        }
-    }
-}
-
-pub(crate) async fn namespace_has_corpus(db: &SurrealDbClient) -> Result<bool> {
-    #[derive(Deserialize)]
-    struct CountRow {
-        count: i64,
-    }
-
-    let mut response = db
-        .client
-        .query("SELECT count() AS count FROM text_chunk")
-        .await
-        .context("checking namespace corpus state")?;
-    let rows: Vec<CountRow> = response.take(0).unwrap_or_default();
-    Ok(rows.first().map(|row| row.count).unwrap_or(0) > 0)
-}
-
-pub(crate) async fn can_reuse_namespace(
-    db: &SurrealDbClient,
-    descriptor: &snapshot::Descriptor,
-    namespace: &str,
-    database: &str,
-    dataset_id: &str,
-    slice_id: &str,
-    ingestion_fingerprint: &str,
-    slice_case_count: usize,
-) -> Result<bool> {
-    let state = match descriptor.load_db_state().await? {
-        Some(state) => state,
-        None => {
-            info!("No namespace state recorded; reseeding corpus from cached shards");
-            return Ok(false);
-        }
-    };
-
-    if state.slice_case_count != slice_case_count {
-        info!(
-            requested_cases = slice_case_count,
-            stored_cases = state.slice_case_count,
-            "Skipping live namespace reuse; cached state does not match requested window"
-        );
-        return Ok(false);
-    }
-
-    if state.dataset_id != dataset_id
-        || state.slice_id != slice_id
-        || state.ingestion_fingerprint != ingestion_fingerprint
-        || state.namespace.as_deref() != Some(namespace)
-        || state.database.as_deref() != Some(database)
-    {
-        info!(
-            namespace,
-            database, "Cached namespace metadata mismatch; rebuilding corpus from ingestion cache"
-        );
-        return Ok(false);
-    }
-
-    if namespace_has_corpus(db).await? {
-        Ok(true)
-    } else {
-        info!(
-            namespace,
-            database,
-            "Namespace metadata matches but tables are empty; reseeding from ingestion cache"
-        );
-        Ok(false)
-    }
-}
-
-fn sanitize_identifier(input: &str) -> String {
-    let mut cleaned: String = input
-        .chars()
-        .map(|ch| {
-            if ch.is_ascii_alphanumeric() {
-                ch.to_ascii_lowercase()
-            } else {
-                '_'
-            }
-        })
-        .collect();
-    if cleaned.is_empty() {
-        cleaned.push('x');
-    }
-    if cleaned.len() > 64 {
-        cleaned.truncate(64);
-    }
-    cleaned
-}
-
-pub(crate) fn default_namespace(dataset_id: &str, limit: Option<usize>) -> String {
-    let dataset_component = sanitize_identifier(dataset_id);
-    let limit_component = match limit {
-        Some(value) if value > 0 => format!("limit{}", value),
-        _ => "all".to_string(),
-    };
-    format!("eval_{}_{}", dataset_component, limit_component)
-}
-
-pub(crate) fn default_database() -> String {
-    "retrieval_eval".to_string()
-}
-
-pub(crate) async fn record_namespace_state(
-    descriptor: &snapshot::Descriptor,
-    dataset_id: &str,
-    slice_id: &str,
-    ingestion_fingerprint: &str,
-    namespace: &str,
-    database: &str,
-    slice_case_count: usize,
-) {
-    let state = DbSnapshotState {
-        dataset_id: dataset_id.to_string(),
-        slice_id: slice_id.to_string(),
-        ingestion_fingerprint: ingestion_fingerprint.to_string(),
-        snapshot_hash: descriptor.metadata_hash().to_string(),
-        updated_at: Utc::now(),
-        namespace: Some(namespace.to_string()),
-        database: Some(database.to_string()),
-        slice_case_count,
-    };
-    if let Err(err) = descriptor.store_db_state(&state).await {
-        warn!(error = %err, "Failed to record namespace state");
-    }
-}
-
-pub(crate) async fn enforce_system_settings(
-    db: &SurrealDbClient,
-    mut settings: SystemSettings,
-    provider_dimension: usize,
-    config: &Config,
-) -> Result<SystemSettings> {
-    let mut updated_settings = settings.clone();
-    let mut needs_settings_update = false;
-
-    if provider_dimension != settings.embedding_dimensions as usize {
-        updated_settings.embedding_dimensions = provider_dimension as u32;
-        needs_settings_update = true;
-    }
-    if let Some(query_override) = config.query_model.as_deref() {
-        if settings.query_model != query_override {
-            info!(
-                model = query_override,
-                "Overriding system query model for this run"
-            );
-            updated_settings.query_model = query_override.to_string();
-            needs_settings_update = true;
-        }
-    }
-    if needs_settings_update {
-        settings = SystemSettings::update(db, updated_settings)
-            .await
-            .context("updating system settings overrides")?;
-    }
-    Ok(settings)
-}
-
-pub(crate) async fn load_or_init_system_settings(
-    db: &SurrealDbClient,
-    _dimension: usize,
-) -> Result<(SystemSettings, bool)> {
-    match SystemSettings::get_current(db).await {
-        Ok(settings) => Ok((settings, false)),
-        Err(AppError::NotFound(_)) => {
-            info!("System settings missing; applying database migrations for namespace");
-            db.apply_migrations()
-                .await
-                .context("applying database migrations after missing system settings")?;
-            let settings = SystemSettings::get_current(db)
-                .await
-                .context("loading system settings after migrations")?;
-            Ok((settings, true))
-        }
-        Err(err) => Err(err).context("loading system settings"),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::ingest::store::{CorpusParagraph, EmbeddedKnowledgeEntity, EmbeddedTextChunk};
-    use crate::ingest::{CorpusManifest, CorpusMetadata, CorpusQuestion, MANIFEST_VERSION};
-    use chrono::Utc;
-    use common::storage::types::text_content::TextContent;
-
-    fn sample_manifest() -> CorpusManifest {
-        let paragraphs = vec![
-            CorpusParagraph {
-                paragraph_id: "p1".to_string(),
-                title: "Alpha".to_string(),
-                text_content: TextContent::new(
-                    "alpha context".to_string(),
-                    None,
-                    "test".to_string(),
-                    None,
-                    None,
-                    "user".to_string(),
-                ),
-                entities: Vec::<EmbeddedKnowledgeEntity>::new(),
-                relationships: Vec::new(),
-                chunks: Vec::<EmbeddedTextChunk>::new(),
-            },
-            CorpusParagraph {
-                paragraph_id: "p2".to_string(),
-                title: "Beta".to_string(),
-                text_content: TextContent::new(
-                    "beta context".to_string(),
-                    None,
-                    "test".to_string(),
-                    None,
-                    None,
-                    "user".to_string(),
-                ),
-                entities: Vec::<EmbeddedKnowledgeEntity>::new(),
-                relationships: Vec::new(),
-                chunks: Vec::<EmbeddedTextChunk>::new(),
-            },
-        ];
-        let questions = vec![
-            CorpusQuestion {
-                question_id: "q1".to_string(),
-                paragraph_id: "p1".to_string(),
-                text_content_id: "tc-alpha".to_string(),
-                question_text: "What is Alpha?".to_string(),
-                answers: vec!["Alpha".to_string()],
-                is_impossible: false,
-                matching_chunk_ids: vec!["chunk-alpha".to_string()],
-            },
-            CorpusQuestion {
-                question_id: "q2".to_string(),
-                paragraph_id: "p1".to_string(),
-                text_content_id: "tc-alpha".to_string(),
-                question_text: "Unanswerable?".to_string(),
-                answers: Vec::new(),
-                is_impossible: true,
-                matching_chunk_ids: Vec::new(),
-            },
-            CorpusQuestion {
-                question_id: "q3".to_string(),
-                paragraph_id: "p2".to_string(),
-                text_content_id: "tc-beta".to_string(),
-                question_text: "Where is Beta?".to_string(),
-                answers: vec!["Beta".to_string()],
-                is_impossible: false,
-                matching_chunk_ids: Vec::new(),
-            },
-        ];
-        CorpusManifest {
-            version: MANIFEST_VERSION,
-            metadata: CorpusMetadata {
-                dataset_id: "ds".to_string(),
-                dataset_label: "Dataset".to_string(),
-                slice_id: "slice".to_string(),
-                include_unanswerable: true,
-                require_verified_chunks: true,
-                ingestion_fingerprint: "fp".to_string(),
-                embedding_backend: "test".to_string(),
-                embedding_model: None,
-                embedding_dimension: 3,
-                converted_checksum: "chk".to_string(),
-                generated_at: Utc::now(),
-                paragraph_count: paragraphs.len(),
-                question_count: questions.len(),
-                chunk_min_tokens: 1,
-                chunk_max_tokens: 10,
-                chunk_only: false,
-            },
-            paragraphs,
-            questions,
-        }
-    }
-
-    #[test]
-    fn cases_respect_mode_filters() {
-        let mut manifest = sample_manifest();
-        manifest.metadata.include_unanswerable = false;
-        manifest.metadata.require_verified_chunks = true;
-
-        let strict_cases = cases_from_manifest(&manifest);
-        assert_eq!(strict_cases.len(), 1);
-        assert_eq!(strict_cases[0].question_id, "q1");
-        assert_eq!(strict_cases[0].paragraph_title, "Alpha");
-
-        let mut llm_manifest = manifest.clone();
-        llm_manifest.metadata.include_unanswerable = true;
-        llm_manifest.metadata.require_verified_chunks = false;
-
-        let llm_cases = cases_from_manifest(&llm_manifest);
-        let ids: Vec<_> = llm_cases
-            .iter()
-            .map(|case| case.question_id.as_str())
-            .collect();
-        assert_eq!(ids, vec!["q1", "q2", "q3"]);
-    }
-}
--- a/eval/src/slice.rs
+++ b/eval/src/slice.rs
@@ -1,28 +0,0 @@
-use crate::slices::SliceConfig as CoreSliceConfig;
-
-pub use crate::slices::*;
-
-use crate::args::Config;
-
-impl<'a> From<&'a Config> for CoreSliceConfig<'a> {
-    fn from(config: &'a Config) -> Self {
-        slice_config_with_limit(config, None)
-    }
-}
-
-pub fn slice_config_with_limit<'a>(
-    config: &'a Config,
-    limit_override: Option<usize>,
-) -> CoreSliceConfig<'a> {
-    CoreSliceConfig {
-        cache_dir: config.cache_dir.as_path(),
-        force_convert: config.force_convert,
-        explicit_slice: config.slice.as_deref(),
-        limit: limit_override.or(config.limit),
-        corpus_limit: config.corpus_limit,
-        slice_seed: config.slice_seed,
-        llm_mode: config.llm_mode,
-        negative_multiplier: config.negative_multiplier,
-        require_verified_chunks: config.retrieval.require_verified_chunks,
-    }
-}
--- a/evaluations/Cargo.toml
+++ b/evaluations/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "eval"
+name = "evaluations"
 version = "0.1.0"
 edition = "2021"

--- a/evaluations/manifest.yaml
+++ b/evaluations/manifest.yaml
--- a/evaluations/src/args.rs
+++ b/evaluations/src/args.rs
@@ -15,15 +15,15 @@ fn workspace_root() -> PathBuf {
 }

 fn default_report_dir() -> PathBuf {
-    workspace_root().join("eval/reports")
+    workspace_root().join("evaluations/reports")
 }

 fn default_cache_dir() -> PathBuf {
-    workspace_root().join("eval/cache")
+    workspace_root().join("evaluations/cache")
 }

 fn default_ingestion_cache_dir() -> PathBuf {
-    workspace_root().join("eval/cache/ingested")
+    workspace_root().join("evaluations/cache/ingested")
 }

 pub const DEFAULT_SLICE_SEED: u64 = 0x5eed_2025;
@@ -135,6 +135,72 @@ impl Default for RetrievalSettings {
    }
 }

+#[derive(Debug, Clone, Args)]
+pub struct IngestConfig {
+    /// Directory for ingestion corpora caches
+    #[arg(long, default_value_os_t = default_ingestion_cache_dir())]
+    pub ingestion_cache_dir: PathBuf,
+
+    /// Minimum tokens per chunk for ingestion
+    #[arg(long, default_value_t = 256)]
+    pub ingest_chunk_min_tokens: usize,
+
+    /// Maximum tokens per chunk for ingestion
+    #[arg(long, default_value_t = 512)]
+    pub ingest_chunk_max_tokens: usize,
+
+    /// Overlap between chunks during ingestion (tokens)
+    #[arg(long, default_value_t = 50)]
+    pub ingest_chunk_overlap_tokens: usize,
+
+    /// Run ingestion in chunk-only mode (skip analyzer/graph generation)
+    #[arg(long)]
+    pub ingest_chunks_only: bool,
+
+    /// Number of paragraphs to ingest concurrently
+    #[arg(long, default_value_t = 10)]
+    pub ingestion_batch_size: usize,
+
+    /// Maximum retries for ingestion failures per paragraph
+    #[arg(long, default_value_t = 3)]
+    pub ingestion_max_retries: usize,
+
+    /// Recompute embeddings for cached corpora without re-running ingestion
+    #[arg(long, alias = "refresh-embeddings")]
+    pub refresh_embeddings_only: bool,
+
+    /// Delete cached paragraph shards before rebuilding the ingestion corpus
+    #[arg(long)]
+    pub slice_reset_ingestion: bool,
+}
+
+#[derive(Debug, Clone, Args)]
+pub struct DatabaseArgs {
+    /// SurrealDB server endpoint
+    #[arg(long, default_value = "ws://127.0.0.1:8000", env = "EVAL_DB_ENDPOINT")]
+    pub db_endpoint: String,
+
+    /// SurrealDB root username
+    #[arg(long, default_value = "root_user", env = "EVAL_DB_USERNAME")]
+    pub db_username: String,
+
+    /// SurrealDB root password
+    #[arg(long, default_value = "root_password", env = "EVAL_DB_PASSWORD")]
+    pub db_password: String,
+
+    /// Override the namespace used on the SurrealDB server
+    #[arg(long, env = "EVAL_DB_NAMESPACE")]
+    pub db_namespace: Option<String>,
+
+    /// Override the database used on the SurrealDB server
+    #[arg(long, env = "EVAL_DB_DATABASE")]
+    pub db_database: Option<String>,
+
+    /// Path to inspect DB state
+    #[arg(long)]
+    pub inspect_db_state: Option<PathBuf>,
+}
+
 #[derive(Parser, Debug, Clone)]
 #[command(author, version, about, long_about = None)]
 pub struct Config {
@@ -205,37 +271,8 @@ pub struct Config {
    #[arg(long, default_value_os_t = default_cache_dir())]
    pub cache_dir: PathBuf,

-    /// Directory for ingestion corpora caches
-    #[arg(long, default_value_os_t = default_ingestion_cache_dir())]
-    pub ingestion_cache_dir: PathBuf,
-
-    /// Minimum tokens per chunk for ingestion
-    #[arg(long, default_value_t = 256)]
-    pub ingest_chunk_min_tokens: usize,
-
-    /// Maximum tokens per chunk for ingestion
-    #[arg(long, default_value_t = 512)]
-    pub ingest_chunk_max_tokens: usize,
-
-    /// Overlap between chunks during ingestion (tokens)
-    #[arg(long, default_value_t = 50)]
-    pub ingest_chunk_overlap_tokens: usize,
-
-    /// Run ingestion in chunk-only mode (skip analyzer/graph generation)
-    #[arg(long)]
-    pub ingest_chunks_only: bool,
-
-    /// Number of paragraphs to ingest concurrently
-    #[arg(long, default_value_t = 10)]
-    pub ingestion_batch_size: usize,
-
-    /// Maximum retries for ingestion failures per paragraph
-    #[arg(long, default_value_t = 3)]
-    pub ingestion_max_retries: usize,
-
-    /// Recompute embeddings for cached corpora without re-running ingestion
-    #[arg(long, alias = "refresh-embeddings")]
-    pub refresh_embeddings_only: bool,
+    #[command(flatten)]
+    pub ingest: IngestConfig,

    /// Include entity descriptions and categories in JSON reports
    #[arg(long)]
@@ -261,12 +298,8 @@ pub struct Config {
    #[arg(long, default_value_t = 0)]
    pub slice_offset: usize,

-    /// Delete cached paragraph shards before rebuilding the ingestion corpus
-    #[arg(long)]
-    pub slice_reset_ingestion: bool,
-
    /// Target negative-to-positive paragraph ratio for slice growth
-    #[arg(long, default_value_t = crate::slices::DEFAULT_NEGATIVE_MULTIPLIER)]
+    #[arg(long, default_value_t = crate::slice::DEFAULT_NEGATIVE_MULTIPLIER)]
    pub negative_multiplier: f32,

    /// Annotate the run; label is stored in JSON/Markdown reports
@@ -301,29 +334,8 @@ pub struct Config {
    #[arg(long, alias = "perf-log")]
    pub perf_log_console: bool,

-    /// SurrealDB server endpoint
-    #[arg(long, default_value = "ws://127.0.0.1:8000", env = "EVAL_DB_ENDPOINT")]
-    pub db_endpoint: String,
-
-    /// SurrealDB root username
-    #[arg(long, default_value = "root_user", env = "EVAL_DB_USERNAME")]
-    pub db_username: String,
-
-    /// SurrealDB root password
-    #[arg(long, default_value = "root_password", env = "EVAL_DB_PASSWORD")]
-    pub db_password: String,
-
-    /// Override the namespace used on the SurrealDB server
-    #[arg(long, env = "EVAL_DB_NAMESPACE")]
-    pub db_namespace: Option<String>,
-
-    /// Override the database used on the SurrealDB server
-    #[arg(long, env = "EVAL_DB_DATABASE")]
-    pub db_database: Option<String>,
-
-    /// Path to inspect DB state
-    #[arg(long)]
-    pub inspect_db_state: Option<PathBuf>,
+    #[command(flatten)]
+    pub database: DatabaseArgs,

    // Computed fields (not arguments)
    #[arg(skip)]
@@ -377,21 +389,21 @@ impl Config {
        }

        // Validations
-        if self.ingest_chunk_min_tokens == 0
-            || self.ingest_chunk_min_tokens >= self.ingest_chunk_max_tokens
+        if self.ingest.ingest_chunk_min_tokens == 0
+            || self.ingest.ingest_chunk_min_tokens >= self.ingest.ingest_chunk_max_tokens
        {
            return Err(anyhow!(
                "--ingest-chunk-min-tokens must be greater than zero and less than --ingest-chunk-max-tokens (got {} >= {})",
-                self.ingest_chunk_min_tokens,
-                self.ingest_chunk_max_tokens
+                self.ingest.ingest_chunk_min_tokens,
+                self.ingest.ingest_chunk_max_tokens
            ));
        }

-        if self.ingest_chunk_overlap_tokens >= self.ingest_chunk_min_tokens {
+        if self.ingest.ingest_chunk_overlap_tokens >= self.ingest.ingest_chunk_min_tokens {
            return Err(anyhow!(
                "--ingest-chunk-overlap-tokens ({}) must be less than --ingest-chunk-min-tokens ({})",
-                self.ingest_chunk_overlap_tokens,
-                self.ingest_chunk_min_tokens
+                self.ingest.ingest_chunk_overlap_tokens,
+                self.ingest.ingest_chunk_min_tokens
            ));
        }

--- a/evaluations/src/cache.rs
+++ b/evaluations/src/cache.rs
--- a/evaluations/src/cases.rs
+++ b/evaluations/src/cases.rs
@@ -0,0 +1,187 @@
+//! Case generation from corpus manifests.
+
+use std::collections::HashMap;
+
+use crate::corpus;
+
+/// A test case for retrieval evaluation derived from a manifest question.
+pub(crate) struct SeededCase {
+    pub question_id: String,
+    pub question: String,
+    pub expected_source: String,
+    pub answers: Vec<String>,
+    pub paragraph_id: String,
+    pub paragraph_title: String,
+    pub expected_chunk_ids: Vec<String>,
+    pub is_impossible: bool,
+    pub has_verified_chunks: bool,
+}
+
+/// Convert a corpus manifest into seeded evaluation cases.
+pub(crate) fn cases_from_manifest(manifest: &corpus::CorpusManifest) -> Vec<SeededCase> {
+    let mut title_map = HashMap::new();
+    for paragraph in &manifest.paragraphs {
+        title_map.insert(paragraph.paragraph_id.as_str(), paragraph.title.clone());
+    }
+
+    let include_impossible = manifest.metadata.include_unanswerable;
+    let require_verified_chunks = manifest.metadata.require_verified_chunks;
+
+    manifest
+        .questions
+        .iter()
+        .filter(|question| {
+            should_include_question(question, include_impossible, require_verified_chunks)
+        })
+        .map(|question| {
+            let title = title_map
+                .get(question.paragraph_id.as_str())
+                .cloned()
+                .unwrap_or_else(|| "Untitled".to_string());
+            SeededCase {
+                question_id: question.question_id.clone(),
+                question: question.question_text.clone(),
+                expected_source: question.text_content_id.clone(),
+                answers: question.answers.clone(),
+                paragraph_id: question.paragraph_id.clone(),
+                paragraph_title: title,
+                expected_chunk_ids: question.matching_chunk_ids.clone(),
+                is_impossible: question.is_impossible,
+                has_verified_chunks: !question.matching_chunk_ids.is_empty(),
+            }
+        })
+        .collect()
+}
+
+fn should_include_question(
+    question: &corpus::CorpusQuestion,
+    include_impossible: bool,
+    require_verified_chunks: bool,
+) -> bool {
+    if !include_impossible && question.is_impossible {
+        return false;
+    }
+    if require_verified_chunks && question.matching_chunk_ids.is_empty() {
+        return false;
+    }
+    true
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::corpus::store::{CorpusParagraph, EmbeddedKnowledgeEntity, EmbeddedTextChunk};
+    use crate::corpus::{CorpusManifest, CorpusMetadata, CorpusQuestion, MANIFEST_VERSION};
+    use chrono::Utc;
+    use common::storage::types::text_content::TextContent;
+
+    fn sample_manifest() -> CorpusManifest {
+        let paragraphs = vec![
+            CorpusParagraph {
+                paragraph_id: "p1".to_string(),
+                title: "Alpha".to_string(),
+                text_content: TextContent::new(
+                    "alpha context".to_string(),
+                    None,
+                    "test".to_string(),
+                    None,
+                    None,
+                    "user".to_string(),
+                ),
+                entities: Vec::<EmbeddedKnowledgeEntity>::new(),
+                relationships: Vec::new(),
+                chunks: Vec::<EmbeddedTextChunk>::new(),
+            },
+            CorpusParagraph {
+                paragraph_id: "p2".to_string(),
+                title: "Beta".to_string(),
+                text_content: TextContent::new(
+                    "beta context".to_string(),
+                    None,
+                    "test".to_string(),
+                    None,
+                    None,
+                    "user".to_string(),
+                ),
+                entities: Vec::<EmbeddedKnowledgeEntity>::new(),
+                relationships: Vec::new(),
+                chunks: Vec::<EmbeddedTextChunk>::new(),
+            },
+        ];
+        let questions = vec![
+            CorpusQuestion {
+                question_id: "q1".to_string(),
+                paragraph_id: "p1".to_string(),
+                text_content_id: "tc-alpha".to_string(),
+                question_text: "What is Alpha?".to_string(),
+                answers: vec!["Alpha".to_string()],
+                is_impossible: false,
+                matching_chunk_ids: vec!["chunk-alpha".to_string()],
+            },
+            CorpusQuestion {
+                question_id: "q2".to_string(),
+                paragraph_id: "p1".to_string(),
+                text_content_id: "tc-alpha".to_string(),
+                question_text: "Unanswerable?".to_string(),
+                answers: Vec::new(),
+                is_impossible: true,
+                matching_chunk_ids: Vec::new(),
+            },
+            CorpusQuestion {
+                question_id: "q3".to_string(),
+                paragraph_id: "p2".to_string(),
+                text_content_id: "tc-beta".to_string(),
+                question_text: "Where is Beta?".to_string(),
+                answers: vec!["Beta".to_string()],
+                is_impossible: false,
+                matching_chunk_ids: Vec::new(),
+            },
+        ];
+        CorpusManifest {
+            version: MANIFEST_VERSION,
+            metadata: CorpusMetadata {
+                dataset_id: "ds".to_string(),
+                dataset_label: "Dataset".to_string(),
+                slice_id: "slice".to_string(),
+                include_unanswerable: true,
+                require_verified_chunks: true,
+                ingestion_fingerprint: "fp".to_string(),
+                embedding_backend: "test".to_string(),
+                embedding_model: None,
+                embedding_dimension: 3,
+                converted_checksum: "chk".to_string(),
+                generated_at: Utc::now(),
+                paragraph_count: paragraphs.len(),
+                question_count: questions.len(),
+                chunk_min_tokens: 1,
+                chunk_max_tokens: 10,
+                chunk_only: false,
+            },
+            paragraphs,
+            questions,
+        }
+    }
+
+    #[test]
+    fn cases_respect_mode_filters() {
+        let mut manifest = sample_manifest();
+        manifest.metadata.include_unanswerable = false;
+        manifest.metadata.require_verified_chunks = true;
+
+        let strict_cases = cases_from_manifest(&manifest);
+        assert_eq!(strict_cases.len(), 1);
+        assert_eq!(strict_cases[0].question_id, "q1");
+        assert_eq!(strict_cases[0].paragraph_title, "Alpha");
+
+        let mut llm_manifest = manifest.clone();
+        llm_manifest.metadata.include_unanswerable = true;
+        llm_manifest.metadata.require_verified_chunks = false;
+
+        let llm_cases = cases_from_manifest(&llm_manifest);
+        let ids: Vec<_> = llm_cases
+            .iter()
+            .map(|case| case.question_id.as_str())
+            .collect();
+        assert_eq!(ids, vec!["q1", "q2", "q3"]);
+    }
+}
--- a/evaluations/src/corpus/config.rs
+++ b/evaluations/src/corpus/config.rs
@@ -32,11 +32,11 @@ impl CorpusCacheConfig {
 impl From<&Config> for CorpusCacheConfig {
    fn from(config: &Config) -> Self {
        CorpusCacheConfig::new(
-            config.ingestion_cache_dir.clone(),
-            config.force_convert || config.slice_reset_ingestion,
-            config.refresh_embeddings_only,
-            config.ingestion_batch_size,
-            config.ingestion_max_retries,
+            config.ingest.ingestion_cache_dir.clone(),
+            config.force_convert || config.ingest.slice_reset_ingestion,
+            config.ingest.refresh_embeddings_only,
+            config.ingest.ingestion_batch_size,
+            config.ingest.ingestion_max_retries,
        )
    }
 }
--- a/evaluations/src/corpus/mod.rs
+++ b/evaluations/src/corpus/mod.rs
@@ -15,12 +15,12 @@ pub use store::{

 pub fn make_ingestion_config(config: &crate::args::Config) -> ingestion_pipeline::IngestionConfig {
    let mut tuning = ingestion_pipeline::IngestionTuning::default();
-    tuning.chunk_min_tokens = config.ingest_chunk_min_tokens;
-    tuning.chunk_max_tokens = config.ingest_chunk_max_tokens;
-    tuning.chunk_overlap_tokens = config.ingest_chunk_overlap_tokens;
+    tuning.chunk_min_tokens = config.ingest.ingest_chunk_min_tokens;
+    tuning.chunk_max_tokens = config.ingest.ingest_chunk_max_tokens;
+    tuning.chunk_overlap_tokens = config.ingest.ingest_chunk_overlap_tokens;

    ingestion_pipeline::IngestionConfig {
        tuning,
-        chunk_only: config.ingest_chunks_only,
+        chunk_only: config.ingest.ingest_chunks_only,
    }
 }
--- a/evaluations/src/corpus/orchestrator.rs
+++ b/evaluations/src/corpus/orchestrator.rs
@@ -26,10 +26,10 @@ use uuid::Uuid;

 use crate::{
    datasets::{ConvertedDataset, ConvertedParagraph, ConvertedQuestion},
-    slices::{self, ResolvedSlice, SliceParagraphKind},
+    slice::{self, ResolvedSlice, SliceParagraphKind},
 };

-use crate::ingest::{
+use crate::corpus::{
    CorpusCacheConfig, CorpusHandle, CorpusManifest, CorpusMetadata, CorpusQuestion,
    EmbeddedKnowledgeEntity, EmbeddedTextChunk, ParagraphShard, ParagraphShardStore,
    MANIFEST_VERSION,
@@ -58,12 +58,12 @@ impl<'a> IngestRequest<'a> {
    fn from_entry(
        slot: usize,
        paragraph: &'a ConvertedParagraph,
-        entry: &'a slices::SliceParagraphEntry,
+        entry: &'a slice::SliceParagraphEntry,
    ) -> Result<Self> {
        let shard_path = entry
            .shard_path
            .clone()
-            .unwrap_or_else(|| slices::default_shard_path(&entry.id));
+            .unwrap_or_else(|| slice::default_shard_path(&entry.id));
        let question_refs = match &entry.kind {
            SliceParagraphKind::Positive { question_ids } => question_ids
                .iter()
@@ -94,7 +94,7 @@ impl<'a> IngestRequest<'a> {

 struct ParagraphPlan<'a> {
    slot: usize,
-    entry: &'a slices::SliceParagraphEntry,
+    entry: &'a slice::SliceParagraphEntry,
    paragraph: &'a ConvertedParagraph,
 }

@@ -109,7 +109,7 @@ struct IngestionStats {
 pub async fn ensure_corpus(
    dataset: &ConvertedDataset,
    slice: &ResolvedSlice<'_>,
-    window: &slices::SliceWindow<'_>,
+    window: &slice::SliceWindow<'_>,
    cache: &CorpusCacheConfig,
    embedding: Arc<common::utils::embedding::EmbeddingProvider>,
    openai: Arc<OpenAIClient>,
@@ -189,7 +189,7 @@ pub async fn ensure_corpus(
            .entry
            .shard_path
            .clone()
-            .unwrap_or_else(|| slices::default_shard_path(&plan_entry.entry.id));
+            .unwrap_or_else(|| slice::default_shard_path(&plan_entry.entry.id));
        let shard = if cache.force_refresh {
            None
        } else {
@@ -683,7 +683,7 @@ mod tests {
    use super::*;
    use crate::{
        datasets::{ConvertedDataset, ConvertedParagraph, ConvertedQuestion, DatasetKind},
-        slices::{CaseRef, SliceCaseEntry, SliceManifest, SliceParagraphEntry, SliceParagraphKind},
+        slice::{CaseRef, SliceCaseEntry, SliceManifest, SliceParagraphEntry, SliceParagraphKind},
    };
    use chrono::Utc;

--- a/evaluations/src/corpus/store.rs
+++ b/evaluations/src/corpus/store.rs
--- a/evaluations/src/datasets/beir.rs
+++ b/evaluations/src/datasets/beir.rs
--- a/evaluations/src/datasets/mod.rs
+++ b/evaluations/src/datasets/mod.rs
--- a/evaluations/src/datasets/nq.rs
+++ b/evaluations/src/datasets/nq.rs
--- a/evaluations/src/datasets/squad.rs
+++ b/evaluations/src/datasets/squad.rs
--- a/evaluations/src/db_helpers.rs
+++ b/evaluations/src/db_helpers.rs
@@ -2,16 +2,6 @@ use anyhow::{Context, Result};
 use common::storage::{db::SurrealDbClient, indexes::ensure_runtime_indexes};
 use tracing::info;

-// Remove and recreate HNSW indexes for changing embedding lengths, used at beginning if embedding length differs from default system settings.
-pub async fn change_embedding_length_in_hnsw_indexes(
-    db: &SurrealDbClient,
-    dimension: usize,
-) -> Result<()> {
-    // No-op for now; runtime indexes are created after ingestion with the correct dimension.
-    let _ = (db, dimension);
-    Ok(())
-}
-
 // Helper functions for index management during namespace reseed
 pub async fn remove_all_indexes(db: &SurrealDbClient) -> Result<()> {
    let _ = db;
@@ -46,6 +36,14 @@ pub async fn reset_namespace(db: &SurrealDbClient, namespace: &str, database: &s
    Ok(())
 }

+// Test helper to force index dimension change
+pub async fn change_embedding_length_in_hnsw_indexes(
+    db: &SurrealDbClient,
+    dimension: usize,
+) -> Result<()> {
+    recreate_indexes(db, dimension).await
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/evaluations/src/eval.rs
+++ b/evaluations/src/eval.rs
@@ -0,0 +1,128 @@
+//! Evaluation utilities module - re-exports from focused submodules.
+
+// Re-export types from the root types module
+pub use crate::types::*;
+
+// Re-export from focused modules at crate root (crate-internal only)
+pub(crate) use crate::cases::{cases_from_manifest, SeededCase};
+pub(crate) use crate::namespace::{
+    can_reuse_namespace, connect_eval_db, default_database, default_namespace, ensure_eval_user,
+    record_namespace_state,
+};
+pub(crate) use crate::settings::{enforce_system_settings, load_or_init_system_settings};
+
+use std::path::Path;
+
+use anyhow::{Context, Result};
+use common::storage::db::SurrealDbClient;
+use tokio::io::AsyncWriteExt;
+use tracing::info;
+
+use crate::{
+    args::{self, Config},
+    datasets::ConvertedDataset,
+    slice::{self},
+};
+
+/// Grow the slice ledger to contain the target number of cases.
+pub async fn grow_slice(dataset: &ConvertedDataset, config: &Config) -> Result<()> {
+    let ledger_limit = ledger_target(config);
+    let slice_settings = slice::slice_config_with_limit(config, ledger_limit);
+    let slice =
+        slice::resolve_slice(dataset, &slice_settings).context("resolving dataset slice")?;
+    info!(
+        slice = slice.manifest.slice_id.as_str(),
+        cases = slice.manifest.case_count,
+        positives = slice.manifest.positive_paragraphs,
+        negatives = slice.manifest.negative_paragraphs,
+        total_paragraphs = slice.manifest.total_paragraphs,
+        "Slice ledger ready"
+    );
+    println!(
+        "Slice `{}` now contains {} questions ({} positives, {} negatives)",
+        slice.manifest.slice_id,
+        slice.manifest.case_count,
+        slice.manifest.positive_paragraphs,
+        slice.manifest.negative_paragraphs
+    );
+    Ok(())
+}
+
+pub(crate) fn ledger_target(config: &Config) -> Option<usize> {
+    match (config.slice_grow, config.limit) {
+        (Some(grow), Some(limit)) => Some(limit.max(grow)),
+        (Some(grow), None) => Some(grow),
+        (None, limit) => limit,
+    }
+}
+
+pub(crate) async fn write_chunk_diagnostics(path: &Path, cases: &[CaseDiagnostics]) -> Result<()> {
+    args::ensure_parent(path)?;
+    let mut file = tokio::fs::File::create(path)
+        .await
+        .with_context(|| format!("creating diagnostics file {}", path.display()))?;
+    for case in cases {
+        let line = serde_json::to_vec(case).context("serialising chunk diagnostics entry")?;
+        file.write_all(&line).await?;
+        file.write_all(b"\n").await?;
+    }
+    file.flush().await?;
+    Ok(())
+}
+
+pub(crate) async fn warm_hnsw_cache(db: &SurrealDbClient, dimension: usize) -> Result<()> {
+    // Create a dummy embedding for cache warming
+    let dummy_embedding: Vec<f32> = (0..dimension).map(|i| (i as f32).sin()).collect();
+
+    info!("Warming HNSW caches with sample queries");
+
+    // Warm up chunk embedding index - just query the embedding table to load HNSW index
+    let _ = db
+        .client
+        .query(
+            r#"SELECT chunk_id
+               FROM text_chunk_embedding
+               WHERE embedding <|1,1|> $embedding
+               LIMIT 5"#,
+        )
+        .bind(("embedding", dummy_embedding.clone()))
+        .await
+        .context("warming text chunk HNSW cache")?;
+
+    // Warm up entity embedding index
+    let _ = db
+        .client
+        .query(
+            r#"SELECT entity_id
+               FROM knowledge_entity_embedding
+               WHERE embedding <|1,1|> $embedding
+               LIMIT 5"#,
+        )
+        .bind(("embedding", dummy_embedding))
+        .await
+        .context("warming knowledge entity HNSW cache")?;
+
+    info!("HNSW cache warming completed");
+    Ok(())
+}
+
+use chrono::{DateTime, SecondsFormat, Utc};
+
+pub fn format_timestamp(timestamp: &DateTime<Utc>) -> String {
+    timestamp.to_rfc3339_opts(SecondsFormat::Secs, true)
+}
+
+pub(crate) fn sanitize_model_code(code: &str) -> String {
+    code.chars()
+        .map(|ch| {
+            if ch.is_ascii_alphanumeric() {
+                ch.to_ascii_lowercase()
+            } else {
+                '_'
+            }
+        })
+        .collect()
+}
+
+// Re-export run_evaluation from the pipeline module at crate root
+pub use crate::pipeline::run_evaluation;
--- a/evaluations/src/inspection.rs
+++ b/evaluations/src/inspection.rs
@@ -7,7 +7,7 @@ use std::{
 use anyhow::{anyhow, Context, Result};
 use common::storage::{db::SurrealDbClient, types::text_chunk::TextChunk};

-use crate::{args::Config, eval::connect_eval_db, ingest, snapshot::DbSnapshotState};
+use crate::{args::Config, eval::connect_eval_db, corpus, snapshot::DbSnapshotState};

 pub async fn inspect_question(config: &Config) -> Result<()> {
    let question_id = config
@@ -65,6 +65,7 @@ pub async fn inspect_question(config: &Config) -> Result<()> {
    }

    let db_state_path = config
+        .database
        .inspect_db_state
        .clone()
        .unwrap_or_else(|| default_state_path(config, &manifest));
@@ -109,14 +110,14 @@ struct ChunkEntry {
    snippet: String,
 }

-fn load_manifest(path: &Path) -> Result<ingest::CorpusManifest> {
+fn load_manifest(path: &Path) -> Result<corpus::CorpusManifest> {
    let bytes =
        fs::read(path).with_context(|| format!("reading ingestion manifest {}", path.display()))?;
    serde_json::from_slice(&bytes)
        .with_context(|| format!("parsing ingestion manifest {}", path.display()))
 }

-fn build_chunk_lookup(manifest: &ingest::CorpusManifest) -> HashMap<String, ChunkEntry> {
+fn build_chunk_lookup(manifest: &corpus::CorpusManifest) -> HashMap<String, ChunkEntry> {
    let mut lookup = HashMap::new();
    for paragraph in &manifest.paragraphs {
        for chunk in &paragraph.chunks {
@@ -139,7 +140,7 @@ fn build_chunk_lookup(manifest: &ingest::CorpusManifest) -> HashMap<String, Chun
    lookup
 }

-fn default_state_path(config: &Config, manifest: &ingest::CorpusManifest) -> PathBuf {
+fn default_state_path(config: &Config, manifest: &corpus::CorpusManifest) -> PathBuf {
    config
        .cache_dir
        .join("snapshots")
--- a/evaluations/src/main.rs
+++ b/evaluations/src/main.rs
@@ -1,16 +1,20 @@
 mod args;
 mod cache;
+mod cases;
+mod corpus;
 mod datasets;
 mod db_helpers;
 mod eval;
-mod ingest;
 mod inspection;
+mod namespace;
 mod openai;
 mod perf;
+mod pipeline;
 mod report;
+mod settings;
 mod slice;
-mod slices;
 mod snapshot;
+mod types;

 use anyhow::Context;
 use tokio::runtime::Builder;
--- a/evaluations/src/namespace.rs
+++ b/evaluations/src/namespace.rs
@@ -0,0 +1,224 @@
+//! Database namespace management utilities.
+
+use anyhow::{anyhow, Context, Result};
+use chrono::Utc;
+use common::storage::{db::SurrealDbClient, types::user::User, types::StoredObject};
+use serde::Deserialize;
+use tracing::{info, warn};
+
+use crate::{
+    args::Config,
+    datasets,
+    snapshot::{self, DbSnapshotState},
+};
+
+/// Connect to the evaluation database with fallback auth strategies.
+pub(crate) async fn connect_eval_db(
+    config: &Config,
+    namespace: &str,
+    database: &str,
+) -> Result<SurrealDbClient> {
+    match SurrealDbClient::new(
+        &config.database.db_endpoint,
+        &config.database.db_username,
+        &config.database.db_password,
+        namespace,
+        database,
+    )
+    .await
+    {
+        Ok(client) => {
+            info!(
+                endpoint = %config.database.db_endpoint,
+                namespace,
+                database,
+                auth = "root",
+                "Connected to SurrealDB"
+            );
+            Ok(client)
+        }
+        Err(root_err) => {
+            info!(
+                endpoint = %config.database.db_endpoint,
+                namespace,
+                database,
+                "Root authentication failed; trying namespace-level auth"
+            );
+            let namespace_client = SurrealDbClient::new_with_namespace_user(
+                &config.database.db_endpoint,
+                namespace,
+                &config.database.db_username,
+                &config.database.db_password,
+                database,
+            )
+            .await
+            .map_err(|ns_err| {
+                anyhow!(
+                    "failed to connect to SurrealDB via root ({root_err}) or namespace ({ns_err}) credentials"
+                )
+            })?;
+            info!(
+                endpoint = %config.database.db_endpoint,
+                namespace,
+                database,
+                auth = "namespace",
+                "Connected to SurrealDB"
+            );
+            Ok(namespace_client)
+        }
+    }
+}
+
+/// Check if the namespace contains any corpus data.
+pub(crate) async fn namespace_has_corpus(db: &SurrealDbClient) -> Result<bool> {
+    #[derive(Deserialize)]
+    struct CountRow {
+        count: i64,
+    }
+
+    let mut response = db
+        .client
+        .query("SELECT count() AS count FROM text_chunk")
+        .await
+        .context("checking namespace corpus state")?;
+    let rows: Vec<CountRow> = response.take(0).unwrap_or_default();
+    Ok(rows.first().map(|row| row.count).unwrap_or(0) > 0)
+}
+
+/// Determine if we can reuse an existing namespace based on cached state.
+pub(crate) async fn can_reuse_namespace(
+    db: &SurrealDbClient,
+    descriptor: &snapshot::Descriptor,
+    namespace: &str,
+    database: &str,
+    dataset_id: &str,
+    slice_id: &str,
+    ingestion_fingerprint: &str,
+    slice_case_count: usize,
+) -> Result<bool> {
+    let state = match descriptor.load_db_state().await? {
+        Some(state) => state,
+        None => {
+            info!("No namespace state recorded; reseeding corpus from cached shards");
+            return Ok(false);
+        }
+    };
+
+    if state.slice_case_count != slice_case_count {
+        info!(
+            requested_cases = slice_case_count,
+            stored_cases = state.slice_case_count,
+            "Skipping live namespace reuse; cached state does not match requested window"
+        );
+        return Ok(false);
+    }
+
+    if state.dataset_id != dataset_id
+        || state.slice_id != slice_id
+        || state.ingestion_fingerprint != ingestion_fingerprint
+        || state.namespace.as_deref() != Some(namespace)
+        || state.database.as_deref() != Some(database)
+    {
+        info!(
+            namespace,
+            database, "Cached namespace metadata mismatch; rebuilding corpus from ingestion cache"
+        );
+        return Ok(false);
+    }
+
+    if namespace_has_corpus(db).await? {
+        Ok(true)
+    } else {
+        info!(
+            namespace,
+            database,
+            "Namespace metadata matches but tables are empty; reseeding from ingestion cache"
+        );
+        Ok(false)
+    }
+}
+
+/// Record the current namespace state to allow future reuse checks.
+pub(crate) async fn record_namespace_state(
+    descriptor: &snapshot::Descriptor,
+    dataset_id: &str,
+    slice_id: &str,
+    ingestion_fingerprint: &str,
+    namespace: &str,
+    database: &str,
+    slice_case_count: usize,
+) {
+    let state = DbSnapshotState {
+        dataset_id: dataset_id.to_string(),
+        slice_id: slice_id.to_string(),
+        ingestion_fingerprint: ingestion_fingerprint.to_string(),
+        snapshot_hash: descriptor.metadata_hash().to_string(),
+        updated_at: Utc::now(),
+        namespace: Some(namespace.to_string()),
+        database: Some(database.to_string()),
+        slice_case_count,
+    };
+    if let Err(err) = descriptor.store_db_state(&state).await {
+        warn!(error = %err, "Failed to record namespace state");
+    }
+}
+
+fn sanitize_identifier(input: &str) -> String {
+    let mut cleaned: String = input
+        .chars()
+        .map(|ch| {
+            if ch.is_ascii_alphanumeric() {
+                ch.to_ascii_lowercase()
+            } else {
+                '_'
+            }
+        })
+        .collect();
+    if cleaned.is_empty() {
+        cleaned.push('x');
+    }
+    if cleaned.len() > 64 {
+        cleaned.truncate(64);
+    }
+    cleaned
+}
+
+/// Generate a default namespace name based on dataset and limit.
+pub(crate) fn default_namespace(dataset_id: &str, limit: Option<usize>) -> String {
+    let dataset_component = sanitize_identifier(dataset_id);
+    let limit_component = match limit {
+        Some(value) if value > 0 => format!("limit{}", value),
+        _ => "all".to_string(),
+    };
+    format!("eval_{}_{}", dataset_component, limit_component)
+}
+
+/// Generate the default database name for evaluations.
+pub(crate) fn default_database() -> String {
+    "retrieval_eval".to_string()
+}
+
+/// Ensure the evaluation user exists in the database.
+pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
+    let timestamp = datasets::base_timestamp();
+    let user = User {
+        id: "eval-user".to_string(),
+        created_at: timestamp,
+        updated_at: timestamp,
+        email: "eval-retrieval@minne.dev".to_string(),
+        password: "not-used".to_string(),
+        anonymous: false,
+        api_key: None,
+        admin: false,
+        timezone: "UTC".to_string(),
+    };
+
+    if let Some(existing) = db.get_item::<User>(&user.get_id()).await? {
+        return Ok(existing);
+    }
+
+    db.store_item(user.clone())
+        .await
+        .context("storing evaluation user")?;
+    Ok(user)
+}
--- a/evaluations/src/openai.rs
+++ b/evaluations/src/openai.rs
--- a/evaluations/src/perf.rs
+++ b/evaluations/src/perf.rs
--- a/evaluations/src/pipeline/context.rs
+++ b/evaluations/src/pipeline/context.rs
@@ -22,7 +22,7 @@ use crate::{
    cache::EmbeddingCache,
    datasets::ConvertedDataset,
    eval::{CaseDiagnostics, CaseSummary, EvaluationStageTimings, EvaluationSummary, SeededCase},
-    ingest, slice, snapshot,
+    corpus, slice, snapshot,
 };

 pub(super) struct EvaluationContext<'a> {
@@ -52,7 +52,7 @@ pub(super) struct EvaluationContext<'a> {
    pub namespace_reused: bool,
    pub evaluation_start: Option<Instant>,
    pub eval_user: Option<User>,
-    pub corpus_handle: Option<ingest::CorpusHandle>,
+    pub corpus_handle: Option<corpus::CorpusHandle>,
    pub cases: Vec<SeededCase>,
    pub filtered_questions: usize,
    pub stage_latency_samples: Vec<PipelineStageTimings>,
@@ -145,7 +145,7 @@ impl<'a> EvaluationContext<'a> {
            .clone()
    }

-    pub fn corpus_handle(&self) -> &ingest::CorpusHandle {
+    pub fn corpus_handle(&self) -> &corpus::CorpusHandle {
        self.corpus_handle.as_ref().expect("corpus handle missing")
    }

--- a/evaluations/src/pipeline/mod.rs
+++ b/evaluations/src/pipeline/mod.rs
@@ -4,7 +4,7 @@ mod state;

 use anyhow::Result;

-use crate::{args::Config, datasets::ConvertedDataset, eval::EvaluationSummary};
+use crate::{args::Config, datasets::ConvertedDataset, types::EvaluationSummary};

 use context::EvaluationContext;

--- a/evaluations/src/pipeline/stages/finalize.rs
+++ b/evaluations/src/pipeline/stages/finalize.rs
--- a/evaluations/src/pipeline/stages/mod.rs
+++ b/evaluations/src/pipeline/stages/mod.rs
--- a/evaluations/src/pipeline/stages/prepare_corpus.rs
+++ b/evaluations/src/pipeline/stages/prepare_corpus.rs
@@ -3,7 +3,7 @@ use std::time::Instant;
 use anyhow::Context;
 use tracing::info;

-use crate::{eval::can_reuse_namespace, ingest, slice, snapshot};
+use crate::{eval::can_reuse_namespace, corpus, slice, snapshot};

 use super::super::{
    context::{EvalStage, EvaluationContext},
@@ -23,7 +23,7 @@ pub(crate) async fn prepare_corpus(
    let started = Instant::now();

    let config = ctx.config();
-    let cache_settings = ingest::CorpusCacheConfig::from(config);
+    let cache_settings = corpus::CorpusCacheConfig::from(config);
    let embedding_provider = ctx.embedding_provider().clone();
    let openai_client = ctx.openai_client();
    let slice = ctx.slice();
@@ -31,14 +31,14 @@ pub(crate) async fn prepare_corpus(
        .context("selecting slice window for corpus preparation")?;

    let descriptor = snapshot::Descriptor::new(config, slice, ctx.embedding_provider());
-    let ingestion_config = ingest::make_ingestion_config(config);
-    let expected_fingerprint = ingest::compute_ingestion_fingerprint(
+    let ingestion_config = corpus::make_ingestion_config(config);
+    let expected_fingerprint = corpus::compute_ingestion_fingerprint(
        ctx.dataset(),
        slice,
        config.converted_dataset_path.as_path(),
        &ingestion_config,
    )?;
-    let base_dir = ingest::cached_corpus_dir(
+    let base_dir = corpus::cached_corpus_dir(
        &cache_settings,
        ctx.dataset().metadata.id.as_str(),
        slice.manifest.slice_id.as_str(),
@@ -58,14 +58,14 @@ pub(crate) async fn prepare_corpus(
        )
        .await?
        {
-            if let Some(manifest) = ingest::load_cached_manifest(&base_dir)? {
+            if let Some(manifest) = corpus::load_cached_manifest(&base_dir)? {
                info!(
                    cache = %base_dir.display(),
                    namespace = ctx.namespace.as_str(),
                    database = ctx.database.as_str(),
                    "Namespace already seeded; reusing cached corpus manifest"
                );
-                let corpus_handle = ingest::corpus_handle_from_manifest(manifest, base_dir);
+                let corpus_handle = corpus::corpus_handle_from_manifest(manifest, base_dir);
                ctx.corpus_handle = Some(corpus_handle);
                ctx.expected_fingerprint = Some(expected_fingerprint);
                ctx.ingestion_duration_ms = 0;
@@ -94,7 +94,7 @@ pub(crate) async fn prepare_corpus(
    let eval_user_id = "eval-user".to_string();
    let ingestion_timer = Instant::now();
    let corpus_handle = {
-        ingest::ensure_corpus(
+        corpus::ensure_corpus(
            ctx.dataset(),
            slice,
            &window,
--- a/evaluations/src/pipeline/stages/prepare_db.rs
+++ b/evaluations/src/pipeline/stages/prepare_db.rs
--- a/evaluations/src/pipeline/stages/prepare_namespace.rs
+++ b/evaluations/src/pipeline/stages/prepare_namespace.rs
@@ -10,7 +10,7 @@ use crate::{
        can_reuse_namespace, cases_from_manifest, enforce_system_settings, ensure_eval_user,
        record_namespace_state, warm_hnsw_cache,
    },
-    ingest,
+    corpus,
 };

 use super::super::{
@@ -47,7 +47,7 @@ pub(crate) async fn prepare_namespace(
        if ctx.window_offset == 0 && ctx.window_length >= base_manifest.questions.len() {
            base_manifest.clone()
        } else {
-            ingest::window_manifest(
+            corpus::window_manifest(
                base_manifest,
                ctx.window_offset,
                ctx.window_length,
@@ -116,7 +116,7 @@ pub(crate) async fn prepare_namespace(
        let indexes_disabled = remove_all_indexes(ctx.db()).await.is_ok();

        let seed_start = Instant::now();
-        ingest::seed_manifest_into_db(ctx.db(), &manifest_for_seed)
+        corpus::seed_manifest_into_db(ctx.db(), &manifest_for_seed)
            .await
            .context("seeding ingestion corpus from manifest")?;
        namespace_seed_ms = Some(seed_start.elapsed().as_millis() as u128);
--- a/evaluations/src/pipeline/stages/prepare_slice.rs
+++ b/evaluations/src/pipeline/stages/prepare_slice.rs
@@ -43,11 +43,15 @@ pub(crate) async fn prepare_slice(
    ctx.window_length = window.length;
    ctx.window_total_cases = window.total_cases;

-    ctx.namespace = ctx.config().db_namespace.clone().unwrap_or_else(|| {
-        default_namespace(ctx.dataset().metadata.id.as_str(), ctx.config().limit)
-    });
+    ctx.namespace = ctx
+        .config()
+        .database
+        .db_namespace
+        .clone()
+        .unwrap_or_else(|| default_namespace(ctx.dataset().metadata.id.as_str(), ctx.config().limit));
    ctx.database = ctx
        .config()
+        .database
        .db_database
        .clone()
        .unwrap_or_else(default_database);
--- a/evaluations/src/pipeline/stages/run_queries.rs
+++ b/evaluations/src/pipeline/stages/run_queries.rs
--- a/evaluations/src/pipeline/stages/summarize.rs
+++ b/evaluations/src/pipeline/stages/summarize.rs
@@ -207,10 +207,10 @@ pub(crate) async fn summarize(
        chunk_rrf_fts_weight: active_tuning.chunk_rrf_fts_weight,
        chunk_rrf_use_vector: active_tuning.chunk_rrf_use_vector,
        chunk_rrf_use_fts: active_tuning.chunk_rrf_use_fts,
-        ingest_chunk_min_tokens: config.ingest_chunk_min_tokens,
-        ingest_chunk_max_tokens: config.ingest_chunk_max_tokens,
-        ingest_chunks_only: config.ingest_chunks_only,
-        ingest_chunk_overlap_tokens: config.ingest_chunk_overlap_tokens,
+        ingest_chunk_min_tokens: config.ingest.ingest_chunk_min_tokens,
+        ingest_chunk_max_tokens: config.ingest.ingest_chunk_max_tokens,
+        ingest_chunks_only: config.ingest.ingest_chunks_only,
+        ingest_chunk_overlap_tokens: config.ingest.ingest_chunk_overlap_tokens,
        chunk_vector_take: active_tuning.chunk_vector_take,
        chunk_fts_take: active_tuning.chunk_fts_take,
        chunk_avg_chars_per_token: active_tuning.avg_chars_per_token,
--- a/evaluations/src/pipeline/state.rs
+++ b/evaluations/src/pipeline/state.rs
--- a/evaluations/src/report.rs
+++ b/evaluations/src/report.rs
--- a/evaluations/src/settings.rs
+++ b/evaluations/src/settings.rs
@@ -0,0 +1,63 @@
+//! System settings enforcement for evaluations.
+
+use anyhow::{Context, Result};
+use common::{
+    error::AppError,
+    storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
+};
+use tracing::info;
+
+use crate::args::Config;
+
+/// Enforce evaluation-specific system settings overrides.
+pub(crate) async fn enforce_system_settings(
+    db: &SurrealDbClient,
+    mut settings: SystemSettings,
+    provider_dimension: usize,
+    config: &Config,
+) -> Result<SystemSettings> {
+    let mut updated_settings = settings.clone();
+    let mut needs_settings_update = false;
+
+    if provider_dimension != settings.embedding_dimensions as usize {
+        updated_settings.embedding_dimensions = provider_dimension as u32;
+        needs_settings_update = true;
+    }
+    if let Some(query_override) = config.query_model.as_deref() {
+        if settings.query_model != query_override {
+            info!(
+                model = query_override,
+                "Overriding system query model for this run"
+            );
+            updated_settings.query_model = query_override.to_string();
+            needs_settings_update = true;
+        }
+    }
+    if needs_settings_update {
+        settings = SystemSettings::update(db, updated_settings)
+            .await
+            .context("updating system settings overrides")?;
+    }
+    Ok(settings)
+}
+
+/// Load existing system settings or initialize them via migrations.
+pub(crate) async fn load_or_init_system_settings(
+    db: &SurrealDbClient,
+    _dimension: usize,
+) -> Result<(SystemSettings, bool)> {
+    match SystemSettings::get_current(db).await {
+        Ok(settings) => Ok((settings, false)),
+        Err(AppError::NotFound(_)) => {
+            info!("System settings missing; applying database migrations for namespace");
+            db.apply_migrations()
+                .await
+                .context("applying database migrations after missing system settings")?;
+            let settings = SystemSettings::get_current(db)
+                .await
+                .context("loading system settings after migrations")?;
+            Ok((settings, true))
+        }
+        Err(err) => Err(err).context("loading system settings"),
+    }
+}
--- a/evaluations/src/slice.rs
+++ b/evaluations/src/slice.rs
@@ -1214,3 +1214,30 @@ mod tests {
        Ok(())
    }
 }
+
+// MARK: - Config integration (merged from slice.rs)
+
+use crate::args::Config;
+
+impl<'a> From<&'a Config> for SliceConfig<'a> {
+    fn from(config: &'a Config) -> Self {
+        slice_config_with_limit(config, None)
+    }
+}
+
+pub fn slice_config_with_limit<'a>(
+    config: &'a Config,
+    limit_override: Option<usize>,
+) -> SliceConfig<'a> {
+    SliceConfig {
+        cache_dir: config.cache_dir.as_path(),
+        force_convert: config.force_convert,
+        explicit_slice: config.slice.as_deref(),
+        limit: limit_override.or(config.limit),
+        corpus_limit: config.corpus_limit,
+        slice_seed: config.slice_seed,
+        llm_mode: config.llm_mode,
+        negative_multiplier: config.negative_multiplier,
+        require_verified_chunks: config.retrieval.require_verified_chunks,
+    }
+}
--- a/evaluations/src/snapshot.rs
+++ b/evaluations/src/snapshot.rs
--- a/evaluations/src/types.rs
+++ b/evaluations/src/types.rs