mirror of
https://github.com/perstarkse/minne.git
synced 2026-04-22 08:48:30 +02:00
benchmarks: fin
This commit is contained in:
@@ -1,2 +1,2 @@
|
|||||||
[alias]
|
[alias]
|
||||||
eval = "run -p eval --"
|
eval = "run -p evaluations --"
|
||||||
|
|||||||
@@ -1,560 +0,0 @@
|
|||||||
mod pipeline;
|
|
||||||
mod types;
|
|
||||||
|
|
||||||
pub use pipeline::run_evaluation;
|
|
||||||
pub use types::*;
|
|
||||||
|
|
||||||
use std::{collections::HashMap, path::Path};
|
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
|
||||||
use chrono::{DateTime, SecondsFormat, Utc};
|
|
||||||
use common::{
|
|
||||||
error::AppError,
|
|
||||||
storage::{
|
|
||||||
db::SurrealDbClient,
|
|
||||||
types::{system_settings::SystemSettings, user::User, StoredObject},
|
|
||||||
},
|
|
||||||
};
|
|
||||||
use serde::Deserialize;
|
|
||||||
use tokio::io::AsyncWriteExt;
|
|
||||||
use tracing::{info, warn};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
args::{self, Config},
|
|
||||||
datasets::{self, ConvertedDataset},
|
|
||||||
ingest,
|
|
||||||
slice::{self},
|
|
||||||
snapshot::{self, DbSnapshotState},
|
|
||||||
};
|
|
||||||
|
|
||||||
pub(crate) struct SeededCase {
|
|
||||||
question_id: String,
|
|
||||||
question: String,
|
|
||||||
expected_source: String,
|
|
||||||
answers: Vec<String>,
|
|
||||||
paragraph_id: String,
|
|
||||||
paragraph_title: String,
|
|
||||||
expected_chunk_ids: Vec<String>,
|
|
||||||
is_impossible: bool,
|
|
||||||
has_verified_chunks: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn cases_from_manifest(manifest: &ingest::CorpusManifest) -> Vec<SeededCase> {
|
|
||||||
let mut title_map = HashMap::new();
|
|
||||||
for paragraph in &manifest.paragraphs {
|
|
||||||
title_map.insert(paragraph.paragraph_id.as_str(), paragraph.title.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
let include_impossible = manifest.metadata.include_unanswerable;
|
|
||||||
let require_verified_chunks = manifest.metadata.require_verified_chunks;
|
|
||||||
|
|
||||||
manifest
|
|
||||||
.questions
|
|
||||||
.iter()
|
|
||||||
.filter(|question| {
|
|
||||||
should_include_question(question, include_impossible, require_verified_chunks)
|
|
||||||
})
|
|
||||||
.map(|question| {
|
|
||||||
let title = title_map
|
|
||||||
.get(question.paragraph_id.as_str())
|
|
||||||
.cloned()
|
|
||||||
.unwrap_or_else(|| "Untitled".to_string());
|
|
||||||
SeededCase {
|
|
||||||
question_id: question.question_id.clone(),
|
|
||||||
question: question.question_text.clone(),
|
|
||||||
expected_source: question.text_content_id.clone(),
|
|
||||||
answers: question.answers.clone(),
|
|
||||||
paragraph_id: question.paragraph_id.clone(),
|
|
||||||
paragraph_title: title,
|
|
||||||
expected_chunk_ids: question.matching_chunk_ids.clone(),
|
|
||||||
is_impossible: question.is_impossible,
|
|
||||||
has_verified_chunks: !question.matching_chunk_ids.is_empty(),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn should_include_question(
|
|
||||||
question: &ingest::CorpusQuestion,
|
|
||||||
include_impossible: bool,
|
|
||||||
require_verified_chunks: bool,
|
|
||||||
) -> bool {
|
|
||||||
if !include_impossible && question.is_impossible {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if require_verified_chunks && question.matching_chunk_ids.is_empty() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn grow_slice(dataset: &ConvertedDataset, config: &Config) -> Result<()> {
|
|
||||||
let ledger_limit = ledger_target(config);
|
|
||||||
let slice_settings = slice::slice_config_with_limit(config, ledger_limit);
|
|
||||||
let slice =
|
|
||||||
slice::resolve_slice(dataset, &slice_settings).context("resolving dataset slice")?;
|
|
||||||
info!(
|
|
||||||
slice = slice.manifest.slice_id.as_str(),
|
|
||||||
cases = slice.manifest.case_count,
|
|
||||||
positives = slice.manifest.positive_paragraphs,
|
|
||||||
negatives = slice.manifest.negative_paragraphs,
|
|
||||||
total_paragraphs = slice.manifest.total_paragraphs,
|
|
||||||
"Slice ledger ready"
|
|
||||||
);
|
|
||||||
println!(
|
|
||||||
"Slice `{}` now contains {} questions ({} positives, {} negatives)",
|
|
||||||
slice.manifest.slice_id,
|
|
||||||
slice.manifest.case_count,
|
|
||||||
slice.manifest.positive_paragraphs,
|
|
||||||
slice.manifest.negative_paragraphs
|
|
||||||
);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn ledger_target(config: &Config) -> Option<usize> {
|
|
||||||
match (config.slice_grow, config.limit) {
|
|
||||||
(Some(grow), Some(limit)) => Some(limit.max(grow)),
|
|
||||||
(Some(grow), None) => Some(grow),
|
|
||||||
(None, limit) => limit,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn write_chunk_diagnostics(path: &Path, cases: &[CaseDiagnostics]) -> Result<()> {
|
|
||||||
args::ensure_parent(path)?;
|
|
||||||
let mut file = tokio::fs::File::create(path)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("creating diagnostics file {}", path.display()))?;
|
|
||||||
for case in cases {
|
|
||||||
let line = serde_json::to_vec(case).context("serialising chunk diagnostics entry")?;
|
|
||||||
file.write_all(&line).await?;
|
|
||||||
file.write_all(b"\n").await?;
|
|
||||||
}
|
|
||||||
file.flush().await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn warm_hnsw_cache(db: &SurrealDbClient, dimension: usize) -> Result<()> {
|
|
||||||
// Create a dummy embedding for cache warming
|
|
||||||
let dummy_embedding: Vec<f32> = (0..dimension).map(|i| (i as f32).sin()).collect();
|
|
||||||
|
|
||||||
info!("Warming HNSW caches with sample queries");
|
|
||||||
|
|
||||||
// Warm up chunk embedding index - just query the embedding table to load HNSW index
|
|
||||||
let _ = db
|
|
||||||
.client
|
|
||||||
.query(
|
|
||||||
r#"SELECT chunk_id
|
|
||||||
FROM text_chunk_embedding
|
|
||||||
WHERE embedding <|1,1|> $embedding
|
|
||||||
LIMIT 5"#,
|
|
||||||
)
|
|
||||||
.bind(("embedding", dummy_embedding.clone()))
|
|
||||||
.await
|
|
||||||
.context("warming text chunk HNSW cache")?;
|
|
||||||
|
|
||||||
// Warm up entity embedding index
|
|
||||||
let _ = db
|
|
||||||
.client
|
|
||||||
.query(
|
|
||||||
r#"SELECT entity_id
|
|
||||||
FROM knowledge_entity_embedding
|
|
||||||
WHERE embedding <|1,1|> $embedding
|
|
||||||
LIMIT 5"#,
|
|
||||||
)
|
|
||||||
.bind(("embedding", dummy_embedding))
|
|
||||||
.await
|
|
||||||
.context("warming knowledge entity HNSW cache")?;
|
|
||||||
|
|
||||||
info!("HNSW cache warming completed");
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
|
|
||||||
let timestamp = datasets::base_timestamp();
|
|
||||||
let user = User {
|
|
||||||
id: "eval-user".to_string(),
|
|
||||||
created_at: timestamp,
|
|
||||||
updated_at: timestamp,
|
|
||||||
email: "eval-retrieval@minne.dev".to_string(),
|
|
||||||
password: "not-used".to_string(),
|
|
||||||
anonymous: false,
|
|
||||||
api_key: None,
|
|
||||||
admin: false,
|
|
||||||
timezone: "UTC".to_string(),
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(existing) = db.get_item::<User>(&user.get_id()).await? {
|
|
||||||
return Ok(existing);
|
|
||||||
}
|
|
||||||
|
|
||||||
db.store_item(user.clone())
|
|
||||||
.await
|
|
||||||
.context("storing evaluation user")?;
|
|
||||||
Ok(user)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn format_timestamp(timestamp: &DateTime<Utc>) -> String {
|
|
||||||
timestamp.to_rfc3339_opts(SecondsFormat::Secs, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn sanitize_model_code(code: &str) -> String {
|
|
||||||
code.chars()
|
|
||||||
.map(|ch| {
|
|
||||||
if ch.is_ascii_alphanumeric() {
|
|
||||||
ch.to_ascii_lowercase()
|
|
||||||
} else {
|
|
||||||
'_'
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn connect_eval_db(
|
|
||||||
config: &Config,
|
|
||||||
namespace: &str,
|
|
||||||
database: &str,
|
|
||||||
) -> Result<SurrealDbClient> {
|
|
||||||
match SurrealDbClient::new(
|
|
||||||
&config.db_endpoint,
|
|
||||||
&config.db_username,
|
|
||||||
&config.db_password,
|
|
||||||
namespace,
|
|
||||||
database,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
Ok(client) => {
|
|
||||||
info!(
|
|
||||||
endpoint = %config.db_endpoint,
|
|
||||||
namespace,
|
|
||||||
database,
|
|
||||||
auth = "root",
|
|
||||||
"Connected to SurrealDB"
|
|
||||||
);
|
|
||||||
Ok(client)
|
|
||||||
}
|
|
||||||
Err(root_err) => {
|
|
||||||
info!(
|
|
||||||
endpoint = %config.db_endpoint,
|
|
||||||
namespace,
|
|
||||||
database,
|
|
||||||
"Root authentication failed; trying namespace-level auth"
|
|
||||||
);
|
|
||||||
let namespace_client = SurrealDbClient::new_with_namespace_user(
|
|
||||||
&config.db_endpoint,
|
|
||||||
namespace,
|
|
||||||
&config.db_username,
|
|
||||||
&config.db_password,
|
|
||||||
database,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.map_err(|ns_err| {
|
|
||||||
anyhow!(
|
|
||||||
"failed to connect to SurrealDB via root ({root_err}) or namespace ({ns_err}) credentials"
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
info!(
|
|
||||||
endpoint = %config.db_endpoint,
|
|
||||||
namespace,
|
|
||||||
database,
|
|
||||||
auth = "namespace",
|
|
||||||
"Connected to SurrealDB"
|
|
||||||
);
|
|
||||||
Ok(namespace_client)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn namespace_has_corpus(db: &SurrealDbClient) -> Result<bool> {
|
|
||||||
#[derive(Deserialize)]
|
|
||||||
struct CountRow {
|
|
||||||
count: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut response = db
|
|
||||||
.client
|
|
||||||
.query("SELECT count() AS count FROM text_chunk")
|
|
||||||
.await
|
|
||||||
.context("checking namespace corpus state")?;
|
|
||||||
let rows: Vec<CountRow> = response.take(0).unwrap_or_default();
|
|
||||||
Ok(rows.first().map(|row| row.count).unwrap_or(0) > 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn can_reuse_namespace(
|
|
||||||
db: &SurrealDbClient,
|
|
||||||
descriptor: &snapshot::Descriptor,
|
|
||||||
namespace: &str,
|
|
||||||
database: &str,
|
|
||||||
dataset_id: &str,
|
|
||||||
slice_id: &str,
|
|
||||||
ingestion_fingerprint: &str,
|
|
||||||
slice_case_count: usize,
|
|
||||||
) -> Result<bool> {
|
|
||||||
let state = match descriptor.load_db_state().await? {
|
|
||||||
Some(state) => state,
|
|
||||||
None => {
|
|
||||||
info!("No namespace state recorded; reseeding corpus from cached shards");
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if state.slice_case_count != slice_case_count {
|
|
||||||
info!(
|
|
||||||
requested_cases = slice_case_count,
|
|
||||||
stored_cases = state.slice_case_count,
|
|
||||||
"Skipping live namespace reuse; cached state does not match requested window"
|
|
||||||
);
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
if state.dataset_id != dataset_id
|
|
||||||
|| state.slice_id != slice_id
|
|
||||||
|| state.ingestion_fingerprint != ingestion_fingerprint
|
|
||||||
|| state.namespace.as_deref() != Some(namespace)
|
|
||||||
|| state.database.as_deref() != Some(database)
|
|
||||||
{
|
|
||||||
info!(
|
|
||||||
namespace,
|
|
||||||
database, "Cached namespace metadata mismatch; rebuilding corpus from ingestion cache"
|
|
||||||
);
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
if namespace_has_corpus(db).await? {
|
|
||||||
Ok(true)
|
|
||||||
} else {
|
|
||||||
info!(
|
|
||||||
namespace,
|
|
||||||
database,
|
|
||||||
"Namespace metadata matches but tables are empty; reseeding from ingestion cache"
|
|
||||||
);
|
|
||||||
Ok(false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sanitize_identifier(input: &str) -> String {
|
|
||||||
let mut cleaned: String = input
|
|
||||||
.chars()
|
|
||||||
.map(|ch| {
|
|
||||||
if ch.is_ascii_alphanumeric() {
|
|
||||||
ch.to_ascii_lowercase()
|
|
||||||
} else {
|
|
||||||
'_'
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
if cleaned.is_empty() {
|
|
||||||
cleaned.push('x');
|
|
||||||
}
|
|
||||||
if cleaned.len() > 64 {
|
|
||||||
cleaned.truncate(64);
|
|
||||||
}
|
|
||||||
cleaned
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn default_namespace(dataset_id: &str, limit: Option<usize>) -> String {
|
|
||||||
let dataset_component = sanitize_identifier(dataset_id);
|
|
||||||
let limit_component = match limit {
|
|
||||||
Some(value) if value > 0 => format!("limit{}", value),
|
|
||||||
_ => "all".to_string(),
|
|
||||||
};
|
|
||||||
format!("eval_{}_{}", dataset_component, limit_component)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn default_database() -> String {
|
|
||||||
"retrieval_eval".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn record_namespace_state(
|
|
||||||
descriptor: &snapshot::Descriptor,
|
|
||||||
dataset_id: &str,
|
|
||||||
slice_id: &str,
|
|
||||||
ingestion_fingerprint: &str,
|
|
||||||
namespace: &str,
|
|
||||||
database: &str,
|
|
||||||
slice_case_count: usize,
|
|
||||||
) {
|
|
||||||
let state = DbSnapshotState {
|
|
||||||
dataset_id: dataset_id.to_string(),
|
|
||||||
slice_id: slice_id.to_string(),
|
|
||||||
ingestion_fingerprint: ingestion_fingerprint.to_string(),
|
|
||||||
snapshot_hash: descriptor.metadata_hash().to_string(),
|
|
||||||
updated_at: Utc::now(),
|
|
||||||
namespace: Some(namespace.to_string()),
|
|
||||||
database: Some(database.to_string()),
|
|
||||||
slice_case_count,
|
|
||||||
};
|
|
||||||
if let Err(err) = descriptor.store_db_state(&state).await {
|
|
||||||
warn!(error = %err, "Failed to record namespace state");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn enforce_system_settings(
|
|
||||||
db: &SurrealDbClient,
|
|
||||||
mut settings: SystemSettings,
|
|
||||||
provider_dimension: usize,
|
|
||||||
config: &Config,
|
|
||||||
) -> Result<SystemSettings> {
|
|
||||||
let mut updated_settings = settings.clone();
|
|
||||||
let mut needs_settings_update = false;
|
|
||||||
|
|
||||||
if provider_dimension != settings.embedding_dimensions as usize {
|
|
||||||
updated_settings.embedding_dimensions = provider_dimension as u32;
|
|
||||||
needs_settings_update = true;
|
|
||||||
}
|
|
||||||
if let Some(query_override) = config.query_model.as_deref() {
|
|
||||||
if settings.query_model != query_override {
|
|
||||||
info!(
|
|
||||||
model = query_override,
|
|
||||||
"Overriding system query model for this run"
|
|
||||||
);
|
|
||||||
updated_settings.query_model = query_override.to_string();
|
|
||||||
needs_settings_update = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if needs_settings_update {
|
|
||||||
settings = SystemSettings::update(db, updated_settings)
|
|
||||||
.await
|
|
||||||
.context("updating system settings overrides")?;
|
|
||||||
}
|
|
||||||
Ok(settings)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn load_or_init_system_settings(
|
|
||||||
db: &SurrealDbClient,
|
|
||||||
_dimension: usize,
|
|
||||||
) -> Result<(SystemSettings, bool)> {
|
|
||||||
match SystemSettings::get_current(db).await {
|
|
||||||
Ok(settings) => Ok((settings, false)),
|
|
||||||
Err(AppError::NotFound(_)) => {
|
|
||||||
info!("System settings missing; applying database migrations for namespace");
|
|
||||||
db.apply_migrations()
|
|
||||||
.await
|
|
||||||
.context("applying database migrations after missing system settings")?;
|
|
||||||
let settings = SystemSettings::get_current(db)
|
|
||||||
.await
|
|
||||||
.context("loading system settings after migrations")?;
|
|
||||||
Ok((settings, true))
|
|
||||||
}
|
|
||||||
Err(err) => Err(err).context("loading system settings"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use crate::ingest::store::{CorpusParagraph, EmbeddedKnowledgeEntity, EmbeddedTextChunk};
|
|
||||||
use crate::ingest::{CorpusManifest, CorpusMetadata, CorpusQuestion, MANIFEST_VERSION};
|
|
||||||
use chrono::Utc;
|
|
||||||
use common::storage::types::text_content::TextContent;
|
|
||||||
|
|
||||||
fn sample_manifest() -> CorpusManifest {
|
|
||||||
let paragraphs = vec![
|
|
||||||
CorpusParagraph {
|
|
||||||
paragraph_id: "p1".to_string(),
|
|
||||||
title: "Alpha".to_string(),
|
|
||||||
text_content: TextContent::new(
|
|
||||||
"alpha context".to_string(),
|
|
||||||
None,
|
|
||||||
"test".to_string(),
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
"user".to_string(),
|
|
||||||
),
|
|
||||||
entities: Vec::<EmbeddedKnowledgeEntity>::new(),
|
|
||||||
relationships: Vec::new(),
|
|
||||||
chunks: Vec::<EmbeddedTextChunk>::new(),
|
|
||||||
},
|
|
||||||
CorpusParagraph {
|
|
||||||
paragraph_id: "p2".to_string(),
|
|
||||||
title: "Beta".to_string(),
|
|
||||||
text_content: TextContent::new(
|
|
||||||
"beta context".to_string(),
|
|
||||||
None,
|
|
||||||
"test".to_string(),
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
"user".to_string(),
|
|
||||||
),
|
|
||||||
entities: Vec::<EmbeddedKnowledgeEntity>::new(),
|
|
||||||
relationships: Vec::new(),
|
|
||||||
chunks: Vec::<EmbeddedTextChunk>::new(),
|
|
||||||
},
|
|
||||||
];
|
|
||||||
let questions = vec![
|
|
||||||
CorpusQuestion {
|
|
||||||
question_id: "q1".to_string(),
|
|
||||||
paragraph_id: "p1".to_string(),
|
|
||||||
text_content_id: "tc-alpha".to_string(),
|
|
||||||
question_text: "What is Alpha?".to_string(),
|
|
||||||
answers: vec!["Alpha".to_string()],
|
|
||||||
is_impossible: false,
|
|
||||||
matching_chunk_ids: vec!["chunk-alpha".to_string()],
|
|
||||||
},
|
|
||||||
CorpusQuestion {
|
|
||||||
question_id: "q2".to_string(),
|
|
||||||
paragraph_id: "p1".to_string(),
|
|
||||||
text_content_id: "tc-alpha".to_string(),
|
|
||||||
question_text: "Unanswerable?".to_string(),
|
|
||||||
answers: Vec::new(),
|
|
||||||
is_impossible: true,
|
|
||||||
matching_chunk_ids: Vec::new(),
|
|
||||||
},
|
|
||||||
CorpusQuestion {
|
|
||||||
question_id: "q3".to_string(),
|
|
||||||
paragraph_id: "p2".to_string(),
|
|
||||||
text_content_id: "tc-beta".to_string(),
|
|
||||||
question_text: "Where is Beta?".to_string(),
|
|
||||||
answers: vec!["Beta".to_string()],
|
|
||||||
is_impossible: false,
|
|
||||||
matching_chunk_ids: Vec::new(),
|
|
||||||
},
|
|
||||||
];
|
|
||||||
CorpusManifest {
|
|
||||||
version: MANIFEST_VERSION,
|
|
||||||
metadata: CorpusMetadata {
|
|
||||||
dataset_id: "ds".to_string(),
|
|
||||||
dataset_label: "Dataset".to_string(),
|
|
||||||
slice_id: "slice".to_string(),
|
|
||||||
include_unanswerable: true,
|
|
||||||
require_verified_chunks: true,
|
|
||||||
ingestion_fingerprint: "fp".to_string(),
|
|
||||||
embedding_backend: "test".to_string(),
|
|
||||||
embedding_model: None,
|
|
||||||
embedding_dimension: 3,
|
|
||||||
converted_checksum: "chk".to_string(),
|
|
||||||
generated_at: Utc::now(),
|
|
||||||
paragraph_count: paragraphs.len(),
|
|
||||||
question_count: questions.len(),
|
|
||||||
chunk_min_tokens: 1,
|
|
||||||
chunk_max_tokens: 10,
|
|
||||||
chunk_only: false,
|
|
||||||
},
|
|
||||||
paragraphs,
|
|
||||||
questions,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn cases_respect_mode_filters() {
|
|
||||||
let mut manifest = sample_manifest();
|
|
||||||
manifest.metadata.include_unanswerable = false;
|
|
||||||
manifest.metadata.require_verified_chunks = true;
|
|
||||||
|
|
||||||
let strict_cases = cases_from_manifest(&manifest);
|
|
||||||
assert_eq!(strict_cases.len(), 1);
|
|
||||||
assert_eq!(strict_cases[0].question_id, "q1");
|
|
||||||
assert_eq!(strict_cases[0].paragraph_title, "Alpha");
|
|
||||||
|
|
||||||
let mut llm_manifest = manifest.clone();
|
|
||||||
llm_manifest.metadata.include_unanswerable = true;
|
|
||||||
llm_manifest.metadata.require_verified_chunks = false;
|
|
||||||
|
|
||||||
let llm_cases = cases_from_manifest(&llm_manifest);
|
|
||||||
let ids: Vec<_> = llm_cases
|
|
||||||
.iter()
|
|
||||||
.map(|case| case.question_id.as_str())
|
|
||||||
.collect();
|
|
||||||
assert_eq!(ids, vec!["q1", "q2", "q3"]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
use crate::slices::SliceConfig as CoreSliceConfig;
|
|
||||||
|
|
||||||
pub use crate::slices::*;
|
|
||||||
|
|
||||||
use crate::args::Config;
|
|
||||||
|
|
||||||
impl<'a> From<&'a Config> for CoreSliceConfig<'a> {
|
|
||||||
fn from(config: &'a Config) -> Self {
|
|
||||||
slice_config_with_limit(config, None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn slice_config_with_limit<'a>(
|
|
||||||
config: &'a Config,
|
|
||||||
limit_override: Option<usize>,
|
|
||||||
) -> CoreSliceConfig<'a> {
|
|
||||||
CoreSliceConfig {
|
|
||||||
cache_dir: config.cache_dir.as_path(),
|
|
||||||
force_convert: config.force_convert,
|
|
||||||
explicit_slice: config.slice.as_deref(),
|
|
||||||
limit: limit_override.or(config.limit),
|
|
||||||
corpus_limit: config.corpus_limit,
|
|
||||||
slice_seed: config.slice_seed,
|
|
||||||
llm_mode: config.llm_mode,
|
|
||||||
negative_multiplier: config.negative_multiplier,
|
|
||||||
require_verified_chunks: config.retrieval.require_verified_chunks,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "eval"
|
name = "evaluations"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
@@ -15,15 +15,15 @@ fn workspace_root() -> PathBuf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn default_report_dir() -> PathBuf {
|
fn default_report_dir() -> PathBuf {
|
||||||
workspace_root().join("eval/reports")
|
workspace_root().join("evaluations/reports")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_cache_dir() -> PathBuf {
|
fn default_cache_dir() -> PathBuf {
|
||||||
workspace_root().join("eval/cache")
|
workspace_root().join("evaluations/cache")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_ingestion_cache_dir() -> PathBuf {
|
fn default_ingestion_cache_dir() -> PathBuf {
|
||||||
workspace_root().join("eval/cache/ingested")
|
workspace_root().join("evaluations/cache/ingested")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const DEFAULT_SLICE_SEED: u64 = 0x5eed_2025;
|
pub const DEFAULT_SLICE_SEED: u64 = 0x5eed_2025;
|
||||||
@@ -135,6 +135,72 @@ impl Default for RetrievalSettings {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Args)]
|
||||||
|
pub struct IngestConfig {
|
||||||
|
/// Directory for ingestion corpora caches
|
||||||
|
#[arg(long, default_value_os_t = default_ingestion_cache_dir())]
|
||||||
|
pub ingestion_cache_dir: PathBuf,
|
||||||
|
|
||||||
|
/// Minimum tokens per chunk for ingestion
|
||||||
|
#[arg(long, default_value_t = 256)]
|
||||||
|
pub ingest_chunk_min_tokens: usize,
|
||||||
|
|
||||||
|
/// Maximum tokens per chunk for ingestion
|
||||||
|
#[arg(long, default_value_t = 512)]
|
||||||
|
pub ingest_chunk_max_tokens: usize,
|
||||||
|
|
||||||
|
/// Overlap between chunks during ingestion (tokens)
|
||||||
|
#[arg(long, default_value_t = 50)]
|
||||||
|
pub ingest_chunk_overlap_tokens: usize,
|
||||||
|
|
||||||
|
/// Run ingestion in chunk-only mode (skip analyzer/graph generation)
|
||||||
|
#[arg(long)]
|
||||||
|
pub ingest_chunks_only: bool,
|
||||||
|
|
||||||
|
/// Number of paragraphs to ingest concurrently
|
||||||
|
#[arg(long, default_value_t = 10)]
|
||||||
|
pub ingestion_batch_size: usize,
|
||||||
|
|
||||||
|
/// Maximum retries for ingestion failures per paragraph
|
||||||
|
#[arg(long, default_value_t = 3)]
|
||||||
|
pub ingestion_max_retries: usize,
|
||||||
|
|
||||||
|
/// Recompute embeddings for cached corpora without re-running ingestion
|
||||||
|
#[arg(long, alias = "refresh-embeddings")]
|
||||||
|
pub refresh_embeddings_only: bool,
|
||||||
|
|
||||||
|
/// Delete cached paragraph shards before rebuilding the ingestion corpus
|
||||||
|
#[arg(long)]
|
||||||
|
pub slice_reset_ingestion: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Args)]
|
||||||
|
pub struct DatabaseArgs {
|
||||||
|
/// SurrealDB server endpoint
|
||||||
|
#[arg(long, default_value = "ws://127.0.0.1:8000", env = "EVAL_DB_ENDPOINT")]
|
||||||
|
pub db_endpoint: String,
|
||||||
|
|
||||||
|
/// SurrealDB root username
|
||||||
|
#[arg(long, default_value = "root_user", env = "EVAL_DB_USERNAME")]
|
||||||
|
pub db_username: String,
|
||||||
|
|
||||||
|
/// SurrealDB root password
|
||||||
|
#[arg(long, default_value = "root_password", env = "EVAL_DB_PASSWORD")]
|
||||||
|
pub db_password: String,
|
||||||
|
|
||||||
|
/// Override the namespace used on the SurrealDB server
|
||||||
|
#[arg(long, env = "EVAL_DB_NAMESPACE")]
|
||||||
|
pub db_namespace: Option<String>,
|
||||||
|
|
||||||
|
/// Override the database used on the SurrealDB server
|
||||||
|
#[arg(long, env = "EVAL_DB_DATABASE")]
|
||||||
|
pub db_database: Option<String>,
|
||||||
|
|
||||||
|
/// Path to inspect DB state
|
||||||
|
#[arg(long)]
|
||||||
|
pub inspect_db_state: Option<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug, Clone)]
|
#[derive(Parser, Debug, Clone)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about, long_about = None)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
@@ -205,37 +271,8 @@ pub struct Config {
|
|||||||
#[arg(long, default_value_os_t = default_cache_dir())]
|
#[arg(long, default_value_os_t = default_cache_dir())]
|
||||||
pub cache_dir: PathBuf,
|
pub cache_dir: PathBuf,
|
||||||
|
|
||||||
/// Directory for ingestion corpora caches
|
#[command(flatten)]
|
||||||
#[arg(long, default_value_os_t = default_ingestion_cache_dir())]
|
pub ingest: IngestConfig,
|
||||||
pub ingestion_cache_dir: PathBuf,
|
|
||||||
|
|
||||||
/// Minimum tokens per chunk for ingestion
|
|
||||||
#[arg(long, default_value_t = 256)]
|
|
||||||
pub ingest_chunk_min_tokens: usize,
|
|
||||||
|
|
||||||
/// Maximum tokens per chunk for ingestion
|
|
||||||
#[arg(long, default_value_t = 512)]
|
|
||||||
pub ingest_chunk_max_tokens: usize,
|
|
||||||
|
|
||||||
/// Overlap between chunks during ingestion (tokens)
|
|
||||||
#[arg(long, default_value_t = 50)]
|
|
||||||
pub ingest_chunk_overlap_tokens: usize,
|
|
||||||
|
|
||||||
/// Run ingestion in chunk-only mode (skip analyzer/graph generation)
|
|
||||||
#[arg(long)]
|
|
||||||
pub ingest_chunks_only: bool,
|
|
||||||
|
|
||||||
/// Number of paragraphs to ingest concurrently
|
|
||||||
#[arg(long, default_value_t = 10)]
|
|
||||||
pub ingestion_batch_size: usize,
|
|
||||||
|
|
||||||
/// Maximum retries for ingestion failures per paragraph
|
|
||||||
#[arg(long, default_value_t = 3)]
|
|
||||||
pub ingestion_max_retries: usize,
|
|
||||||
|
|
||||||
/// Recompute embeddings for cached corpora without re-running ingestion
|
|
||||||
#[arg(long, alias = "refresh-embeddings")]
|
|
||||||
pub refresh_embeddings_only: bool,
|
|
||||||
|
|
||||||
/// Include entity descriptions and categories in JSON reports
|
/// Include entity descriptions and categories in JSON reports
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
@@ -261,12 +298,8 @@ pub struct Config {
|
|||||||
#[arg(long, default_value_t = 0)]
|
#[arg(long, default_value_t = 0)]
|
||||||
pub slice_offset: usize,
|
pub slice_offset: usize,
|
||||||
|
|
||||||
/// Delete cached paragraph shards before rebuilding the ingestion corpus
|
|
||||||
#[arg(long)]
|
|
||||||
pub slice_reset_ingestion: bool,
|
|
||||||
|
|
||||||
/// Target negative-to-positive paragraph ratio for slice growth
|
/// Target negative-to-positive paragraph ratio for slice growth
|
||||||
#[arg(long, default_value_t = crate::slices::DEFAULT_NEGATIVE_MULTIPLIER)]
|
#[arg(long, default_value_t = crate::slice::DEFAULT_NEGATIVE_MULTIPLIER)]
|
||||||
pub negative_multiplier: f32,
|
pub negative_multiplier: f32,
|
||||||
|
|
||||||
/// Annotate the run; label is stored in JSON/Markdown reports
|
/// Annotate the run; label is stored in JSON/Markdown reports
|
||||||
@@ -301,29 +334,8 @@ pub struct Config {
|
|||||||
#[arg(long, alias = "perf-log")]
|
#[arg(long, alias = "perf-log")]
|
||||||
pub perf_log_console: bool,
|
pub perf_log_console: bool,
|
||||||
|
|
||||||
/// SurrealDB server endpoint
|
#[command(flatten)]
|
||||||
#[arg(long, default_value = "ws://127.0.0.1:8000", env = "EVAL_DB_ENDPOINT")]
|
pub database: DatabaseArgs,
|
||||||
pub db_endpoint: String,
|
|
||||||
|
|
||||||
/// SurrealDB root username
|
|
||||||
#[arg(long, default_value = "root_user", env = "EVAL_DB_USERNAME")]
|
|
||||||
pub db_username: String,
|
|
||||||
|
|
||||||
/// SurrealDB root password
|
|
||||||
#[arg(long, default_value = "root_password", env = "EVAL_DB_PASSWORD")]
|
|
||||||
pub db_password: String,
|
|
||||||
|
|
||||||
/// Override the namespace used on the SurrealDB server
|
|
||||||
#[arg(long, env = "EVAL_DB_NAMESPACE")]
|
|
||||||
pub db_namespace: Option<String>,
|
|
||||||
|
|
||||||
/// Override the database used on the SurrealDB server
|
|
||||||
#[arg(long, env = "EVAL_DB_DATABASE")]
|
|
||||||
pub db_database: Option<String>,
|
|
||||||
|
|
||||||
/// Path to inspect DB state
|
|
||||||
#[arg(long)]
|
|
||||||
pub inspect_db_state: Option<PathBuf>,
|
|
||||||
|
|
||||||
// Computed fields (not arguments)
|
// Computed fields (not arguments)
|
||||||
#[arg(skip)]
|
#[arg(skip)]
|
||||||
@@ -377,21 +389,21 @@ impl Config {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validations
|
// Validations
|
||||||
if self.ingest_chunk_min_tokens == 0
|
if self.ingest.ingest_chunk_min_tokens == 0
|
||||||
|| self.ingest_chunk_min_tokens >= self.ingest_chunk_max_tokens
|
|| self.ingest.ingest_chunk_min_tokens >= self.ingest.ingest_chunk_max_tokens
|
||||||
{
|
{
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"--ingest-chunk-min-tokens must be greater than zero and less than --ingest-chunk-max-tokens (got {} >= {})",
|
"--ingest-chunk-min-tokens must be greater than zero and less than --ingest-chunk-max-tokens (got {} >= {})",
|
||||||
self.ingest_chunk_min_tokens,
|
self.ingest.ingest_chunk_min_tokens,
|
||||||
self.ingest_chunk_max_tokens
|
self.ingest.ingest_chunk_max_tokens
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.ingest_chunk_overlap_tokens >= self.ingest_chunk_min_tokens {
|
if self.ingest.ingest_chunk_overlap_tokens >= self.ingest.ingest_chunk_min_tokens {
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"--ingest-chunk-overlap-tokens ({}) must be less than --ingest-chunk-min-tokens ({})",
|
"--ingest-chunk-overlap-tokens ({}) must be less than --ingest-chunk-min-tokens ({})",
|
||||||
self.ingest_chunk_overlap_tokens,
|
self.ingest.ingest_chunk_overlap_tokens,
|
||||||
self.ingest_chunk_min_tokens
|
self.ingest.ingest_chunk_min_tokens
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
187
evaluations/src/cases.rs
Normal file
187
evaluations/src/cases.rs
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
//! Case generation from corpus manifests.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::corpus;
|
||||||
|
|
||||||
|
/// A test case for retrieval evaluation derived from a manifest question.
|
||||||
|
pub(crate) struct SeededCase {
|
||||||
|
pub question_id: String,
|
||||||
|
pub question: String,
|
||||||
|
pub expected_source: String,
|
||||||
|
pub answers: Vec<String>,
|
||||||
|
pub paragraph_id: String,
|
||||||
|
pub paragraph_title: String,
|
||||||
|
pub expected_chunk_ids: Vec<String>,
|
||||||
|
pub is_impossible: bool,
|
||||||
|
pub has_verified_chunks: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a corpus manifest into seeded evaluation cases.
|
||||||
|
pub(crate) fn cases_from_manifest(manifest: &corpus::CorpusManifest) -> Vec<SeededCase> {
|
||||||
|
let mut title_map = HashMap::new();
|
||||||
|
for paragraph in &manifest.paragraphs {
|
||||||
|
title_map.insert(paragraph.paragraph_id.as_str(), paragraph.title.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let include_impossible = manifest.metadata.include_unanswerable;
|
||||||
|
let require_verified_chunks = manifest.metadata.require_verified_chunks;
|
||||||
|
|
||||||
|
manifest
|
||||||
|
.questions
|
||||||
|
.iter()
|
||||||
|
.filter(|question| {
|
||||||
|
should_include_question(question, include_impossible, require_verified_chunks)
|
||||||
|
})
|
||||||
|
.map(|question| {
|
||||||
|
let title = title_map
|
||||||
|
.get(question.paragraph_id.as_str())
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_else(|| "Untitled".to_string());
|
||||||
|
SeededCase {
|
||||||
|
question_id: question.question_id.clone(),
|
||||||
|
question: question.question_text.clone(),
|
||||||
|
expected_source: question.text_content_id.clone(),
|
||||||
|
answers: question.answers.clone(),
|
||||||
|
paragraph_id: question.paragraph_id.clone(),
|
||||||
|
paragraph_title: title,
|
||||||
|
expected_chunk_ids: question.matching_chunk_ids.clone(),
|
||||||
|
is_impossible: question.is_impossible,
|
||||||
|
has_verified_chunks: !question.matching_chunk_ids.is_empty(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn should_include_question(
|
||||||
|
question: &corpus::CorpusQuestion,
|
||||||
|
include_impossible: bool,
|
||||||
|
require_verified_chunks: bool,
|
||||||
|
) -> bool {
|
||||||
|
if !include_impossible && question.is_impossible {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if require_verified_chunks && question.matching_chunk_ids.is_empty() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::corpus::store::{CorpusParagraph, EmbeddedKnowledgeEntity, EmbeddedTextChunk};
|
||||||
|
use crate::corpus::{CorpusManifest, CorpusMetadata, CorpusQuestion, MANIFEST_VERSION};
|
||||||
|
use chrono::Utc;
|
||||||
|
use common::storage::types::text_content::TextContent;
|
||||||
|
|
||||||
|
fn sample_manifest() -> CorpusManifest {
|
||||||
|
let paragraphs = vec![
|
||||||
|
CorpusParagraph {
|
||||||
|
paragraph_id: "p1".to_string(),
|
||||||
|
title: "Alpha".to_string(),
|
||||||
|
text_content: TextContent::new(
|
||||||
|
"alpha context".to_string(),
|
||||||
|
None,
|
||||||
|
"test".to_string(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
"user".to_string(),
|
||||||
|
),
|
||||||
|
entities: Vec::<EmbeddedKnowledgeEntity>::new(),
|
||||||
|
relationships: Vec::new(),
|
||||||
|
chunks: Vec::<EmbeddedTextChunk>::new(),
|
||||||
|
},
|
||||||
|
CorpusParagraph {
|
||||||
|
paragraph_id: "p2".to_string(),
|
||||||
|
title: "Beta".to_string(),
|
||||||
|
text_content: TextContent::new(
|
||||||
|
"beta context".to_string(),
|
||||||
|
None,
|
||||||
|
"test".to_string(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
"user".to_string(),
|
||||||
|
),
|
||||||
|
entities: Vec::<EmbeddedKnowledgeEntity>::new(),
|
||||||
|
relationships: Vec::new(),
|
||||||
|
chunks: Vec::<EmbeddedTextChunk>::new(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let questions = vec![
|
||||||
|
CorpusQuestion {
|
||||||
|
question_id: "q1".to_string(),
|
||||||
|
paragraph_id: "p1".to_string(),
|
||||||
|
text_content_id: "tc-alpha".to_string(),
|
||||||
|
question_text: "What is Alpha?".to_string(),
|
||||||
|
answers: vec!["Alpha".to_string()],
|
||||||
|
is_impossible: false,
|
||||||
|
matching_chunk_ids: vec!["chunk-alpha".to_string()],
|
||||||
|
},
|
||||||
|
CorpusQuestion {
|
||||||
|
question_id: "q2".to_string(),
|
||||||
|
paragraph_id: "p1".to_string(),
|
||||||
|
text_content_id: "tc-alpha".to_string(),
|
||||||
|
question_text: "Unanswerable?".to_string(),
|
||||||
|
answers: Vec::new(),
|
||||||
|
is_impossible: true,
|
||||||
|
matching_chunk_ids: Vec::new(),
|
||||||
|
},
|
||||||
|
CorpusQuestion {
|
||||||
|
question_id: "q3".to_string(),
|
||||||
|
paragraph_id: "p2".to_string(),
|
||||||
|
text_content_id: "tc-beta".to_string(),
|
||||||
|
question_text: "Where is Beta?".to_string(),
|
||||||
|
answers: vec!["Beta".to_string()],
|
||||||
|
is_impossible: false,
|
||||||
|
matching_chunk_ids: Vec::new(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
CorpusManifest {
|
||||||
|
version: MANIFEST_VERSION,
|
||||||
|
metadata: CorpusMetadata {
|
||||||
|
dataset_id: "ds".to_string(),
|
||||||
|
dataset_label: "Dataset".to_string(),
|
||||||
|
slice_id: "slice".to_string(),
|
||||||
|
include_unanswerable: true,
|
||||||
|
require_verified_chunks: true,
|
||||||
|
ingestion_fingerprint: "fp".to_string(),
|
||||||
|
embedding_backend: "test".to_string(),
|
||||||
|
embedding_model: None,
|
||||||
|
embedding_dimension: 3,
|
||||||
|
converted_checksum: "chk".to_string(),
|
||||||
|
generated_at: Utc::now(),
|
||||||
|
paragraph_count: paragraphs.len(),
|
||||||
|
question_count: questions.len(),
|
||||||
|
chunk_min_tokens: 1,
|
||||||
|
chunk_max_tokens: 10,
|
||||||
|
chunk_only: false,
|
||||||
|
},
|
||||||
|
paragraphs,
|
||||||
|
questions,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cases_respect_mode_filters() {
|
||||||
|
let mut manifest = sample_manifest();
|
||||||
|
manifest.metadata.include_unanswerable = false;
|
||||||
|
manifest.metadata.require_verified_chunks = true;
|
||||||
|
|
||||||
|
let strict_cases = cases_from_manifest(&manifest);
|
||||||
|
assert_eq!(strict_cases.len(), 1);
|
||||||
|
assert_eq!(strict_cases[0].question_id, "q1");
|
||||||
|
assert_eq!(strict_cases[0].paragraph_title, "Alpha");
|
||||||
|
|
||||||
|
let mut llm_manifest = manifest.clone();
|
||||||
|
llm_manifest.metadata.include_unanswerable = true;
|
||||||
|
llm_manifest.metadata.require_verified_chunks = false;
|
||||||
|
|
||||||
|
let llm_cases = cases_from_manifest(&llm_manifest);
|
||||||
|
let ids: Vec<_> = llm_cases
|
||||||
|
.iter()
|
||||||
|
.map(|case| case.question_id.as_str())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(ids, vec!["q1", "q2", "q3"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -32,11 +32,11 @@ impl CorpusCacheConfig {
|
|||||||
impl From<&Config> for CorpusCacheConfig {
|
impl From<&Config> for CorpusCacheConfig {
|
||||||
fn from(config: &Config) -> Self {
|
fn from(config: &Config) -> Self {
|
||||||
CorpusCacheConfig::new(
|
CorpusCacheConfig::new(
|
||||||
config.ingestion_cache_dir.clone(),
|
config.ingest.ingestion_cache_dir.clone(),
|
||||||
config.force_convert || config.slice_reset_ingestion,
|
config.force_convert || config.ingest.slice_reset_ingestion,
|
||||||
config.refresh_embeddings_only,
|
config.ingest.refresh_embeddings_only,
|
||||||
config.ingestion_batch_size,
|
config.ingest.ingestion_batch_size,
|
||||||
config.ingestion_max_retries,
|
config.ingest.ingestion_max_retries,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -15,12 +15,12 @@ pub use store::{
|
|||||||
|
|
||||||
pub fn make_ingestion_config(config: &crate::args::Config) -> ingestion_pipeline::IngestionConfig {
|
pub fn make_ingestion_config(config: &crate::args::Config) -> ingestion_pipeline::IngestionConfig {
|
||||||
let mut tuning = ingestion_pipeline::IngestionTuning::default();
|
let mut tuning = ingestion_pipeline::IngestionTuning::default();
|
||||||
tuning.chunk_min_tokens = config.ingest_chunk_min_tokens;
|
tuning.chunk_min_tokens = config.ingest.ingest_chunk_min_tokens;
|
||||||
tuning.chunk_max_tokens = config.ingest_chunk_max_tokens;
|
tuning.chunk_max_tokens = config.ingest.ingest_chunk_max_tokens;
|
||||||
tuning.chunk_overlap_tokens = config.ingest_chunk_overlap_tokens;
|
tuning.chunk_overlap_tokens = config.ingest.ingest_chunk_overlap_tokens;
|
||||||
|
|
||||||
ingestion_pipeline::IngestionConfig {
|
ingestion_pipeline::IngestionConfig {
|
||||||
tuning,
|
tuning,
|
||||||
chunk_only: config.ingest_chunks_only,
|
chunk_only: config.ingest.ingest_chunks_only,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -26,10 +26,10 @@ use uuid::Uuid;
|
|||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
datasets::{ConvertedDataset, ConvertedParagraph, ConvertedQuestion},
|
datasets::{ConvertedDataset, ConvertedParagraph, ConvertedQuestion},
|
||||||
slices::{self, ResolvedSlice, SliceParagraphKind},
|
slice::{self, ResolvedSlice, SliceParagraphKind},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::ingest::{
|
use crate::corpus::{
|
||||||
CorpusCacheConfig, CorpusHandle, CorpusManifest, CorpusMetadata, CorpusQuestion,
|
CorpusCacheConfig, CorpusHandle, CorpusManifest, CorpusMetadata, CorpusQuestion,
|
||||||
EmbeddedKnowledgeEntity, EmbeddedTextChunk, ParagraphShard, ParagraphShardStore,
|
EmbeddedKnowledgeEntity, EmbeddedTextChunk, ParagraphShard, ParagraphShardStore,
|
||||||
MANIFEST_VERSION,
|
MANIFEST_VERSION,
|
||||||
@@ -58,12 +58,12 @@ impl<'a> IngestRequest<'a> {
|
|||||||
fn from_entry(
|
fn from_entry(
|
||||||
slot: usize,
|
slot: usize,
|
||||||
paragraph: &'a ConvertedParagraph,
|
paragraph: &'a ConvertedParagraph,
|
||||||
entry: &'a slices::SliceParagraphEntry,
|
entry: &'a slice::SliceParagraphEntry,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let shard_path = entry
|
let shard_path = entry
|
||||||
.shard_path
|
.shard_path
|
||||||
.clone()
|
.clone()
|
||||||
.unwrap_or_else(|| slices::default_shard_path(&entry.id));
|
.unwrap_or_else(|| slice::default_shard_path(&entry.id));
|
||||||
let question_refs = match &entry.kind {
|
let question_refs = match &entry.kind {
|
||||||
SliceParagraphKind::Positive { question_ids } => question_ids
|
SliceParagraphKind::Positive { question_ids } => question_ids
|
||||||
.iter()
|
.iter()
|
||||||
@@ -94,7 +94,7 @@ impl<'a> IngestRequest<'a> {
|
|||||||
|
|
||||||
struct ParagraphPlan<'a> {
|
struct ParagraphPlan<'a> {
|
||||||
slot: usize,
|
slot: usize,
|
||||||
entry: &'a slices::SliceParagraphEntry,
|
entry: &'a slice::SliceParagraphEntry,
|
||||||
paragraph: &'a ConvertedParagraph,
|
paragraph: &'a ConvertedParagraph,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,7 +109,7 @@ struct IngestionStats {
|
|||||||
pub async fn ensure_corpus(
|
pub async fn ensure_corpus(
|
||||||
dataset: &ConvertedDataset,
|
dataset: &ConvertedDataset,
|
||||||
slice: &ResolvedSlice<'_>,
|
slice: &ResolvedSlice<'_>,
|
||||||
window: &slices::SliceWindow<'_>,
|
window: &slice::SliceWindow<'_>,
|
||||||
cache: &CorpusCacheConfig,
|
cache: &CorpusCacheConfig,
|
||||||
embedding: Arc<common::utils::embedding::EmbeddingProvider>,
|
embedding: Arc<common::utils::embedding::EmbeddingProvider>,
|
||||||
openai: Arc<OpenAIClient>,
|
openai: Arc<OpenAIClient>,
|
||||||
@@ -189,7 +189,7 @@ pub async fn ensure_corpus(
|
|||||||
.entry
|
.entry
|
||||||
.shard_path
|
.shard_path
|
||||||
.clone()
|
.clone()
|
||||||
.unwrap_or_else(|| slices::default_shard_path(&plan_entry.entry.id));
|
.unwrap_or_else(|| slice::default_shard_path(&plan_entry.entry.id));
|
||||||
let shard = if cache.force_refresh {
|
let shard = if cache.force_refresh {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
@@ -683,7 +683,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::{
|
use crate::{
|
||||||
datasets::{ConvertedDataset, ConvertedParagraph, ConvertedQuestion, DatasetKind},
|
datasets::{ConvertedDataset, ConvertedParagraph, ConvertedQuestion, DatasetKind},
|
||||||
slices::{CaseRef, SliceCaseEntry, SliceManifest, SliceParagraphEntry, SliceParagraphKind},
|
slice::{CaseRef, SliceCaseEntry, SliceManifest, SliceParagraphEntry, SliceParagraphKind},
|
||||||
};
|
};
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
|
|
||||||
@@ -2,16 +2,6 @@ use anyhow::{Context, Result};
|
|||||||
use common::storage::{db::SurrealDbClient, indexes::ensure_runtime_indexes};
|
use common::storage::{db::SurrealDbClient, indexes::ensure_runtime_indexes};
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
// Remove and recreate HNSW indexes for changing embedding lengths, used at beginning if embedding length differs from default system settings.
|
|
||||||
pub async fn change_embedding_length_in_hnsw_indexes(
|
|
||||||
db: &SurrealDbClient,
|
|
||||||
dimension: usize,
|
|
||||||
) -> Result<()> {
|
|
||||||
// No-op for now; runtime indexes are created after ingestion with the correct dimension.
|
|
||||||
let _ = (db, dimension);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper functions for index management during namespace reseed
|
// Helper functions for index management during namespace reseed
|
||||||
pub async fn remove_all_indexes(db: &SurrealDbClient) -> Result<()> {
|
pub async fn remove_all_indexes(db: &SurrealDbClient) -> Result<()> {
|
||||||
let _ = db;
|
let _ = db;
|
||||||
@@ -46,6 +36,14 @@ pub async fn reset_namespace(db: &SurrealDbClient, namespace: &str, database: &s
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test helper to force index dimension change
|
||||||
|
pub async fn change_embedding_length_in_hnsw_indexes(
|
||||||
|
db: &SurrealDbClient,
|
||||||
|
dimension: usize,
|
||||||
|
) -> Result<()> {
|
||||||
|
recreate_indexes(db, dimension).await
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
128
evaluations/src/eval.rs
Normal file
128
evaluations/src/eval.rs
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
//! Evaluation utilities module - re-exports from focused submodules.
|
||||||
|
|
||||||
|
// Re-export types from the root types module
|
||||||
|
pub use crate::types::*;
|
||||||
|
|
||||||
|
// Re-export from focused modules at crate root (crate-internal only)
|
||||||
|
pub(crate) use crate::cases::{cases_from_manifest, SeededCase};
|
||||||
|
pub(crate) use crate::namespace::{
|
||||||
|
can_reuse_namespace, connect_eval_db, default_database, default_namespace, ensure_eval_user,
|
||||||
|
record_namespace_state,
|
||||||
|
};
|
||||||
|
pub(crate) use crate::settings::{enforce_system_settings, load_or_init_system_settings};
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use common::storage::db::SurrealDbClient;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
args::{self, Config},
|
||||||
|
datasets::ConvertedDataset,
|
||||||
|
slice::{self},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Grow the slice ledger to contain the target number of cases.
|
||||||
|
pub async fn grow_slice(dataset: &ConvertedDataset, config: &Config) -> Result<()> {
|
||||||
|
let ledger_limit = ledger_target(config);
|
||||||
|
let slice_settings = slice::slice_config_with_limit(config, ledger_limit);
|
||||||
|
let slice =
|
||||||
|
slice::resolve_slice(dataset, &slice_settings).context("resolving dataset slice")?;
|
||||||
|
info!(
|
||||||
|
slice = slice.manifest.slice_id.as_str(),
|
||||||
|
cases = slice.manifest.case_count,
|
||||||
|
positives = slice.manifest.positive_paragraphs,
|
||||||
|
negatives = slice.manifest.negative_paragraphs,
|
||||||
|
total_paragraphs = slice.manifest.total_paragraphs,
|
||||||
|
"Slice ledger ready"
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"Slice `{}` now contains {} questions ({} positives, {} negatives)",
|
||||||
|
slice.manifest.slice_id,
|
||||||
|
slice.manifest.case_count,
|
||||||
|
slice.manifest.positive_paragraphs,
|
||||||
|
slice.manifest.negative_paragraphs
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ledger_target(config: &Config) -> Option<usize> {
|
||||||
|
match (config.slice_grow, config.limit) {
|
||||||
|
(Some(grow), Some(limit)) => Some(limit.max(grow)),
|
||||||
|
(Some(grow), None) => Some(grow),
|
||||||
|
(None, limit) => limit,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn write_chunk_diagnostics(path: &Path, cases: &[CaseDiagnostics]) -> Result<()> {
|
||||||
|
args::ensure_parent(path)?;
|
||||||
|
let mut file = tokio::fs::File::create(path)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("creating diagnostics file {}", path.display()))?;
|
||||||
|
for case in cases {
|
||||||
|
let line = serde_json::to_vec(case).context("serialising chunk diagnostics entry")?;
|
||||||
|
file.write_all(&line).await?;
|
||||||
|
file.write_all(b"\n").await?;
|
||||||
|
}
|
||||||
|
file.flush().await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn warm_hnsw_cache(db: &SurrealDbClient, dimension: usize) -> Result<()> {
|
||||||
|
// Create a dummy embedding for cache warming
|
||||||
|
let dummy_embedding: Vec<f32> = (0..dimension).map(|i| (i as f32).sin()).collect();
|
||||||
|
|
||||||
|
info!("Warming HNSW caches with sample queries");
|
||||||
|
|
||||||
|
// Warm up chunk embedding index - just query the embedding table to load HNSW index
|
||||||
|
let _ = db
|
||||||
|
.client
|
||||||
|
.query(
|
||||||
|
r#"SELECT chunk_id
|
||||||
|
FROM text_chunk_embedding
|
||||||
|
WHERE embedding <|1,1|> $embedding
|
||||||
|
LIMIT 5"#,
|
||||||
|
)
|
||||||
|
.bind(("embedding", dummy_embedding.clone()))
|
||||||
|
.await
|
||||||
|
.context("warming text chunk HNSW cache")?;
|
||||||
|
|
||||||
|
// Warm up entity embedding index
|
||||||
|
let _ = db
|
||||||
|
.client
|
||||||
|
.query(
|
||||||
|
r#"SELECT entity_id
|
||||||
|
FROM knowledge_entity_embedding
|
||||||
|
WHERE embedding <|1,1|> $embedding
|
||||||
|
LIMIT 5"#,
|
||||||
|
)
|
||||||
|
.bind(("embedding", dummy_embedding))
|
||||||
|
.await
|
||||||
|
.context("warming knowledge entity HNSW cache")?;
|
||||||
|
|
||||||
|
info!("HNSW cache warming completed");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
use chrono::{DateTime, SecondsFormat, Utc};
|
||||||
|
|
||||||
|
pub fn format_timestamp(timestamp: &DateTime<Utc>) -> String {
|
||||||
|
timestamp.to_rfc3339_opts(SecondsFormat::Secs, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn sanitize_model_code(code: &str) -> String {
|
||||||
|
code.chars()
|
||||||
|
.map(|ch| {
|
||||||
|
if ch.is_ascii_alphanumeric() {
|
||||||
|
ch.to_ascii_lowercase()
|
||||||
|
} else {
|
||||||
|
'_'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-export run_evaluation from the pipeline module at crate root
|
||||||
|
pub use crate::pipeline::run_evaluation;
|
||||||
@@ -7,7 +7,7 @@ use std::{
|
|||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use common::storage::{db::SurrealDbClient, types::text_chunk::TextChunk};
|
use common::storage::{db::SurrealDbClient, types::text_chunk::TextChunk};
|
||||||
|
|
||||||
use crate::{args::Config, eval::connect_eval_db, ingest, snapshot::DbSnapshotState};
|
use crate::{args::Config, eval::connect_eval_db, corpus, snapshot::DbSnapshotState};
|
||||||
|
|
||||||
pub async fn inspect_question(config: &Config) -> Result<()> {
|
pub async fn inspect_question(config: &Config) -> Result<()> {
|
||||||
let question_id = config
|
let question_id = config
|
||||||
@@ -65,6 +65,7 @@ pub async fn inspect_question(config: &Config) -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let db_state_path = config
|
let db_state_path = config
|
||||||
|
.database
|
||||||
.inspect_db_state
|
.inspect_db_state
|
||||||
.clone()
|
.clone()
|
||||||
.unwrap_or_else(|| default_state_path(config, &manifest));
|
.unwrap_or_else(|| default_state_path(config, &manifest));
|
||||||
@@ -109,14 +110,14 @@ struct ChunkEntry {
|
|||||||
snippet: String,
|
snippet: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_manifest(path: &Path) -> Result<ingest::CorpusManifest> {
|
fn load_manifest(path: &Path) -> Result<corpus::CorpusManifest> {
|
||||||
let bytes =
|
let bytes =
|
||||||
fs::read(path).with_context(|| format!("reading ingestion manifest {}", path.display()))?;
|
fs::read(path).with_context(|| format!("reading ingestion manifest {}", path.display()))?;
|
||||||
serde_json::from_slice(&bytes)
|
serde_json::from_slice(&bytes)
|
||||||
.with_context(|| format!("parsing ingestion manifest {}", path.display()))
|
.with_context(|| format!("parsing ingestion manifest {}", path.display()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_chunk_lookup(manifest: &ingest::CorpusManifest) -> HashMap<String, ChunkEntry> {
|
fn build_chunk_lookup(manifest: &corpus::CorpusManifest) -> HashMap<String, ChunkEntry> {
|
||||||
let mut lookup = HashMap::new();
|
let mut lookup = HashMap::new();
|
||||||
for paragraph in &manifest.paragraphs {
|
for paragraph in &manifest.paragraphs {
|
||||||
for chunk in ¶graph.chunks {
|
for chunk in ¶graph.chunks {
|
||||||
@@ -139,7 +140,7 @@ fn build_chunk_lookup(manifest: &ingest::CorpusManifest) -> HashMap<String, Chun
|
|||||||
lookup
|
lookup
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_state_path(config: &Config, manifest: &ingest::CorpusManifest) -> PathBuf {
|
fn default_state_path(config: &Config, manifest: &corpus::CorpusManifest) -> PathBuf {
|
||||||
config
|
config
|
||||||
.cache_dir
|
.cache_dir
|
||||||
.join("snapshots")
|
.join("snapshots")
|
||||||
@@ -1,16 +1,20 @@
|
|||||||
mod args;
|
mod args;
|
||||||
mod cache;
|
mod cache;
|
||||||
|
mod cases;
|
||||||
|
mod corpus;
|
||||||
mod datasets;
|
mod datasets;
|
||||||
mod db_helpers;
|
mod db_helpers;
|
||||||
mod eval;
|
mod eval;
|
||||||
mod ingest;
|
|
||||||
mod inspection;
|
mod inspection;
|
||||||
|
mod namespace;
|
||||||
mod openai;
|
mod openai;
|
||||||
mod perf;
|
mod perf;
|
||||||
|
mod pipeline;
|
||||||
mod report;
|
mod report;
|
||||||
|
mod settings;
|
||||||
mod slice;
|
mod slice;
|
||||||
mod slices;
|
|
||||||
mod snapshot;
|
mod snapshot;
|
||||||
|
mod types;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use tokio::runtime::Builder;
|
use tokio::runtime::Builder;
|
||||||
224
evaluations/src/namespace.rs
Normal file
224
evaluations/src/namespace.rs
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
//! Database namespace management utilities.
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use chrono::Utc;
|
||||||
|
use common::storage::{db::SurrealDbClient, types::user::User, types::StoredObject};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
args::Config,
|
||||||
|
datasets,
|
||||||
|
snapshot::{self, DbSnapshotState},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Connect to the evaluation database with fallback auth strategies.
|
||||||
|
pub(crate) async fn connect_eval_db(
|
||||||
|
config: &Config,
|
||||||
|
namespace: &str,
|
||||||
|
database: &str,
|
||||||
|
) -> Result<SurrealDbClient> {
|
||||||
|
match SurrealDbClient::new(
|
||||||
|
&config.database.db_endpoint,
|
||||||
|
&config.database.db_username,
|
||||||
|
&config.database.db_password,
|
||||||
|
namespace,
|
||||||
|
database,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(client) => {
|
||||||
|
info!(
|
||||||
|
endpoint = %config.database.db_endpoint,
|
||||||
|
namespace,
|
||||||
|
database,
|
||||||
|
auth = "root",
|
||||||
|
"Connected to SurrealDB"
|
||||||
|
);
|
||||||
|
Ok(client)
|
||||||
|
}
|
||||||
|
Err(root_err) => {
|
||||||
|
info!(
|
||||||
|
endpoint = %config.database.db_endpoint,
|
||||||
|
namespace,
|
||||||
|
database,
|
||||||
|
"Root authentication failed; trying namespace-level auth"
|
||||||
|
);
|
||||||
|
let namespace_client = SurrealDbClient::new_with_namespace_user(
|
||||||
|
&config.database.db_endpoint,
|
||||||
|
namespace,
|
||||||
|
&config.database.db_username,
|
||||||
|
&config.database.db_password,
|
||||||
|
database,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|ns_err| {
|
||||||
|
anyhow!(
|
||||||
|
"failed to connect to SurrealDB via root ({root_err}) or namespace ({ns_err}) credentials"
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
info!(
|
||||||
|
endpoint = %config.database.db_endpoint,
|
||||||
|
namespace,
|
||||||
|
database,
|
||||||
|
auth = "namespace",
|
||||||
|
"Connected to SurrealDB"
|
||||||
|
);
|
||||||
|
Ok(namespace_client)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the namespace contains any corpus data.
|
||||||
|
pub(crate) async fn namespace_has_corpus(db: &SurrealDbClient) -> Result<bool> {
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct CountRow {
|
||||||
|
count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut response = db
|
||||||
|
.client
|
||||||
|
.query("SELECT count() AS count FROM text_chunk")
|
||||||
|
.await
|
||||||
|
.context("checking namespace corpus state")?;
|
||||||
|
let rows: Vec<CountRow> = response.take(0).unwrap_or_default();
|
||||||
|
Ok(rows.first().map(|row| row.count).unwrap_or(0) > 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine if we can reuse an existing namespace based on cached state.
|
||||||
|
pub(crate) async fn can_reuse_namespace(
|
||||||
|
db: &SurrealDbClient,
|
||||||
|
descriptor: &snapshot::Descriptor,
|
||||||
|
namespace: &str,
|
||||||
|
database: &str,
|
||||||
|
dataset_id: &str,
|
||||||
|
slice_id: &str,
|
||||||
|
ingestion_fingerprint: &str,
|
||||||
|
slice_case_count: usize,
|
||||||
|
) -> Result<bool> {
|
||||||
|
let state = match descriptor.load_db_state().await? {
|
||||||
|
Some(state) => state,
|
||||||
|
None => {
|
||||||
|
info!("No namespace state recorded; reseeding corpus from cached shards");
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if state.slice_case_count != slice_case_count {
|
||||||
|
info!(
|
||||||
|
requested_cases = slice_case_count,
|
||||||
|
stored_cases = state.slice_case_count,
|
||||||
|
"Skipping live namespace reuse; cached state does not match requested window"
|
||||||
|
);
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if state.dataset_id != dataset_id
|
||||||
|
|| state.slice_id != slice_id
|
||||||
|
|| state.ingestion_fingerprint != ingestion_fingerprint
|
||||||
|
|| state.namespace.as_deref() != Some(namespace)
|
||||||
|
|| state.database.as_deref() != Some(database)
|
||||||
|
{
|
||||||
|
info!(
|
||||||
|
namespace,
|
||||||
|
database, "Cached namespace metadata mismatch; rebuilding corpus from ingestion cache"
|
||||||
|
);
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if namespace_has_corpus(db).await? {
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
info!(
|
||||||
|
namespace,
|
||||||
|
database,
|
||||||
|
"Namespace metadata matches but tables are empty; reseeding from ingestion cache"
|
||||||
|
);
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the current namespace state to allow future reuse checks.
|
||||||
|
pub(crate) async fn record_namespace_state(
|
||||||
|
descriptor: &snapshot::Descriptor,
|
||||||
|
dataset_id: &str,
|
||||||
|
slice_id: &str,
|
||||||
|
ingestion_fingerprint: &str,
|
||||||
|
namespace: &str,
|
||||||
|
database: &str,
|
||||||
|
slice_case_count: usize,
|
||||||
|
) {
|
||||||
|
let state = DbSnapshotState {
|
||||||
|
dataset_id: dataset_id.to_string(),
|
||||||
|
slice_id: slice_id.to_string(),
|
||||||
|
ingestion_fingerprint: ingestion_fingerprint.to_string(),
|
||||||
|
snapshot_hash: descriptor.metadata_hash().to_string(),
|
||||||
|
updated_at: Utc::now(),
|
||||||
|
namespace: Some(namespace.to_string()),
|
||||||
|
database: Some(database.to_string()),
|
||||||
|
slice_case_count,
|
||||||
|
};
|
||||||
|
if let Err(err) = descriptor.store_db_state(&state).await {
|
||||||
|
warn!(error = %err, "Failed to record namespace state");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sanitize_identifier(input: &str) -> String {
|
||||||
|
let mut cleaned: String = input
|
||||||
|
.chars()
|
||||||
|
.map(|ch| {
|
||||||
|
if ch.is_ascii_alphanumeric() {
|
||||||
|
ch.to_ascii_lowercase()
|
||||||
|
} else {
|
||||||
|
'_'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
if cleaned.is_empty() {
|
||||||
|
cleaned.push('x');
|
||||||
|
}
|
||||||
|
if cleaned.len() > 64 {
|
||||||
|
cleaned.truncate(64);
|
||||||
|
}
|
||||||
|
cleaned
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate a default namespace name based on dataset and limit.
|
||||||
|
pub(crate) fn default_namespace(dataset_id: &str, limit: Option<usize>) -> String {
|
||||||
|
let dataset_component = sanitize_identifier(dataset_id);
|
||||||
|
let limit_component = match limit {
|
||||||
|
Some(value) if value > 0 => format!("limit{}", value),
|
||||||
|
_ => "all".to_string(),
|
||||||
|
};
|
||||||
|
format!("eval_{}_{}", dataset_component, limit_component)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate the default database name for evaluations.
|
||||||
|
pub(crate) fn default_database() -> String {
|
||||||
|
"retrieval_eval".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ensure the evaluation user exists in the database.
|
||||||
|
pub(crate) async fn ensure_eval_user(db: &SurrealDbClient) -> Result<User> {
|
||||||
|
let timestamp = datasets::base_timestamp();
|
||||||
|
let user = User {
|
||||||
|
id: "eval-user".to_string(),
|
||||||
|
created_at: timestamp,
|
||||||
|
updated_at: timestamp,
|
||||||
|
email: "eval-retrieval@minne.dev".to_string(),
|
||||||
|
password: "not-used".to_string(),
|
||||||
|
anonymous: false,
|
||||||
|
api_key: None,
|
||||||
|
admin: false,
|
||||||
|
timezone: "UTC".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(existing) = db.get_item::<User>(&user.get_id()).await? {
|
||||||
|
return Ok(existing);
|
||||||
|
}
|
||||||
|
|
||||||
|
db.store_item(user.clone())
|
||||||
|
.await
|
||||||
|
.context("storing evaluation user")?;
|
||||||
|
Ok(user)
|
||||||
|
}
|
||||||
@@ -22,7 +22,7 @@ use crate::{
|
|||||||
cache::EmbeddingCache,
|
cache::EmbeddingCache,
|
||||||
datasets::ConvertedDataset,
|
datasets::ConvertedDataset,
|
||||||
eval::{CaseDiagnostics, CaseSummary, EvaluationStageTimings, EvaluationSummary, SeededCase},
|
eval::{CaseDiagnostics, CaseSummary, EvaluationStageTimings, EvaluationSummary, SeededCase},
|
||||||
ingest, slice, snapshot,
|
corpus, slice, snapshot,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub(super) struct EvaluationContext<'a> {
|
pub(super) struct EvaluationContext<'a> {
|
||||||
@@ -52,7 +52,7 @@ pub(super) struct EvaluationContext<'a> {
|
|||||||
pub namespace_reused: bool,
|
pub namespace_reused: bool,
|
||||||
pub evaluation_start: Option<Instant>,
|
pub evaluation_start: Option<Instant>,
|
||||||
pub eval_user: Option<User>,
|
pub eval_user: Option<User>,
|
||||||
pub corpus_handle: Option<ingest::CorpusHandle>,
|
pub corpus_handle: Option<corpus::CorpusHandle>,
|
||||||
pub cases: Vec<SeededCase>,
|
pub cases: Vec<SeededCase>,
|
||||||
pub filtered_questions: usize,
|
pub filtered_questions: usize,
|
||||||
pub stage_latency_samples: Vec<PipelineStageTimings>,
|
pub stage_latency_samples: Vec<PipelineStageTimings>,
|
||||||
@@ -145,7 +145,7 @@ impl<'a> EvaluationContext<'a> {
|
|||||||
.clone()
|
.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn corpus_handle(&self) -> &ingest::CorpusHandle {
|
pub fn corpus_handle(&self) -> &corpus::CorpusHandle {
|
||||||
self.corpus_handle.as_ref().expect("corpus handle missing")
|
self.corpus_handle.as_ref().expect("corpus handle missing")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4,7 +4,7 @@ mod state;
|
|||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
use crate::{args::Config, datasets::ConvertedDataset, eval::EvaluationSummary};
|
use crate::{args::Config, datasets::ConvertedDataset, types::EvaluationSummary};
|
||||||
|
|
||||||
use context::EvaluationContext;
|
use context::EvaluationContext;
|
||||||
|
|
||||||
@@ -3,7 +3,7 @@ use std::time::Instant;
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use crate::{eval::can_reuse_namespace, ingest, slice, snapshot};
|
use crate::{eval::can_reuse_namespace, corpus, slice, snapshot};
|
||||||
|
|
||||||
use super::super::{
|
use super::super::{
|
||||||
context::{EvalStage, EvaluationContext},
|
context::{EvalStage, EvaluationContext},
|
||||||
@@ -23,7 +23,7 @@ pub(crate) async fn prepare_corpus(
|
|||||||
let started = Instant::now();
|
let started = Instant::now();
|
||||||
|
|
||||||
let config = ctx.config();
|
let config = ctx.config();
|
||||||
let cache_settings = ingest::CorpusCacheConfig::from(config);
|
let cache_settings = corpus::CorpusCacheConfig::from(config);
|
||||||
let embedding_provider = ctx.embedding_provider().clone();
|
let embedding_provider = ctx.embedding_provider().clone();
|
||||||
let openai_client = ctx.openai_client();
|
let openai_client = ctx.openai_client();
|
||||||
let slice = ctx.slice();
|
let slice = ctx.slice();
|
||||||
@@ -31,14 +31,14 @@ pub(crate) async fn prepare_corpus(
|
|||||||
.context("selecting slice window for corpus preparation")?;
|
.context("selecting slice window for corpus preparation")?;
|
||||||
|
|
||||||
let descriptor = snapshot::Descriptor::new(config, slice, ctx.embedding_provider());
|
let descriptor = snapshot::Descriptor::new(config, slice, ctx.embedding_provider());
|
||||||
let ingestion_config = ingest::make_ingestion_config(config);
|
let ingestion_config = corpus::make_ingestion_config(config);
|
||||||
let expected_fingerprint = ingest::compute_ingestion_fingerprint(
|
let expected_fingerprint = corpus::compute_ingestion_fingerprint(
|
||||||
ctx.dataset(),
|
ctx.dataset(),
|
||||||
slice,
|
slice,
|
||||||
config.converted_dataset_path.as_path(),
|
config.converted_dataset_path.as_path(),
|
||||||
&ingestion_config,
|
&ingestion_config,
|
||||||
)?;
|
)?;
|
||||||
let base_dir = ingest::cached_corpus_dir(
|
let base_dir = corpus::cached_corpus_dir(
|
||||||
&cache_settings,
|
&cache_settings,
|
||||||
ctx.dataset().metadata.id.as_str(),
|
ctx.dataset().metadata.id.as_str(),
|
||||||
slice.manifest.slice_id.as_str(),
|
slice.manifest.slice_id.as_str(),
|
||||||
@@ -58,14 +58,14 @@ pub(crate) async fn prepare_corpus(
|
|||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
{
|
{
|
||||||
if let Some(manifest) = ingest::load_cached_manifest(&base_dir)? {
|
if let Some(manifest) = corpus::load_cached_manifest(&base_dir)? {
|
||||||
info!(
|
info!(
|
||||||
cache = %base_dir.display(),
|
cache = %base_dir.display(),
|
||||||
namespace = ctx.namespace.as_str(),
|
namespace = ctx.namespace.as_str(),
|
||||||
database = ctx.database.as_str(),
|
database = ctx.database.as_str(),
|
||||||
"Namespace already seeded; reusing cached corpus manifest"
|
"Namespace already seeded; reusing cached corpus manifest"
|
||||||
);
|
);
|
||||||
let corpus_handle = ingest::corpus_handle_from_manifest(manifest, base_dir);
|
let corpus_handle = corpus::corpus_handle_from_manifest(manifest, base_dir);
|
||||||
ctx.corpus_handle = Some(corpus_handle);
|
ctx.corpus_handle = Some(corpus_handle);
|
||||||
ctx.expected_fingerprint = Some(expected_fingerprint);
|
ctx.expected_fingerprint = Some(expected_fingerprint);
|
||||||
ctx.ingestion_duration_ms = 0;
|
ctx.ingestion_duration_ms = 0;
|
||||||
@@ -94,7 +94,7 @@ pub(crate) async fn prepare_corpus(
|
|||||||
let eval_user_id = "eval-user".to_string();
|
let eval_user_id = "eval-user".to_string();
|
||||||
let ingestion_timer = Instant::now();
|
let ingestion_timer = Instant::now();
|
||||||
let corpus_handle = {
|
let corpus_handle = {
|
||||||
ingest::ensure_corpus(
|
corpus::ensure_corpus(
|
||||||
ctx.dataset(),
|
ctx.dataset(),
|
||||||
slice,
|
slice,
|
||||||
&window,
|
&window,
|
||||||
@@ -10,7 +10,7 @@ use crate::{
|
|||||||
can_reuse_namespace, cases_from_manifest, enforce_system_settings, ensure_eval_user,
|
can_reuse_namespace, cases_from_manifest, enforce_system_settings, ensure_eval_user,
|
||||||
record_namespace_state, warm_hnsw_cache,
|
record_namespace_state, warm_hnsw_cache,
|
||||||
},
|
},
|
||||||
ingest,
|
corpus,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::super::{
|
use super::super::{
|
||||||
@@ -47,7 +47,7 @@ pub(crate) async fn prepare_namespace(
|
|||||||
if ctx.window_offset == 0 && ctx.window_length >= base_manifest.questions.len() {
|
if ctx.window_offset == 0 && ctx.window_length >= base_manifest.questions.len() {
|
||||||
base_manifest.clone()
|
base_manifest.clone()
|
||||||
} else {
|
} else {
|
||||||
ingest::window_manifest(
|
corpus::window_manifest(
|
||||||
base_manifest,
|
base_manifest,
|
||||||
ctx.window_offset,
|
ctx.window_offset,
|
||||||
ctx.window_length,
|
ctx.window_length,
|
||||||
@@ -116,7 +116,7 @@ pub(crate) async fn prepare_namespace(
|
|||||||
let indexes_disabled = remove_all_indexes(ctx.db()).await.is_ok();
|
let indexes_disabled = remove_all_indexes(ctx.db()).await.is_ok();
|
||||||
|
|
||||||
let seed_start = Instant::now();
|
let seed_start = Instant::now();
|
||||||
ingest::seed_manifest_into_db(ctx.db(), &manifest_for_seed)
|
corpus::seed_manifest_into_db(ctx.db(), &manifest_for_seed)
|
||||||
.await
|
.await
|
||||||
.context("seeding ingestion corpus from manifest")?;
|
.context("seeding ingestion corpus from manifest")?;
|
||||||
namespace_seed_ms = Some(seed_start.elapsed().as_millis() as u128);
|
namespace_seed_ms = Some(seed_start.elapsed().as_millis() as u128);
|
||||||
@@ -43,11 +43,15 @@ pub(crate) async fn prepare_slice(
|
|||||||
ctx.window_length = window.length;
|
ctx.window_length = window.length;
|
||||||
ctx.window_total_cases = window.total_cases;
|
ctx.window_total_cases = window.total_cases;
|
||||||
|
|
||||||
ctx.namespace = ctx.config().db_namespace.clone().unwrap_or_else(|| {
|
ctx.namespace = ctx
|
||||||
default_namespace(ctx.dataset().metadata.id.as_str(), ctx.config().limit)
|
.config()
|
||||||
});
|
.database
|
||||||
|
.db_namespace
|
||||||
|
.clone()
|
||||||
|
.unwrap_or_else(|| default_namespace(ctx.dataset().metadata.id.as_str(), ctx.config().limit));
|
||||||
ctx.database = ctx
|
ctx.database = ctx
|
||||||
.config()
|
.config()
|
||||||
|
.database
|
||||||
.db_database
|
.db_database
|
||||||
.clone()
|
.clone()
|
||||||
.unwrap_or_else(default_database);
|
.unwrap_or_else(default_database);
|
||||||
@@ -207,10 +207,10 @@ pub(crate) async fn summarize(
|
|||||||
chunk_rrf_fts_weight: active_tuning.chunk_rrf_fts_weight,
|
chunk_rrf_fts_weight: active_tuning.chunk_rrf_fts_weight,
|
||||||
chunk_rrf_use_vector: active_tuning.chunk_rrf_use_vector,
|
chunk_rrf_use_vector: active_tuning.chunk_rrf_use_vector,
|
||||||
chunk_rrf_use_fts: active_tuning.chunk_rrf_use_fts,
|
chunk_rrf_use_fts: active_tuning.chunk_rrf_use_fts,
|
||||||
ingest_chunk_min_tokens: config.ingest_chunk_min_tokens,
|
ingest_chunk_min_tokens: config.ingest.ingest_chunk_min_tokens,
|
||||||
ingest_chunk_max_tokens: config.ingest_chunk_max_tokens,
|
ingest_chunk_max_tokens: config.ingest.ingest_chunk_max_tokens,
|
||||||
ingest_chunks_only: config.ingest_chunks_only,
|
ingest_chunks_only: config.ingest.ingest_chunks_only,
|
||||||
ingest_chunk_overlap_tokens: config.ingest_chunk_overlap_tokens,
|
ingest_chunk_overlap_tokens: config.ingest.ingest_chunk_overlap_tokens,
|
||||||
chunk_vector_take: active_tuning.chunk_vector_take,
|
chunk_vector_take: active_tuning.chunk_vector_take,
|
||||||
chunk_fts_take: active_tuning.chunk_fts_take,
|
chunk_fts_take: active_tuning.chunk_fts_take,
|
||||||
chunk_avg_chars_per_token: active_tuning.avg_chars_per_token,
|
chunk_avg_chars_per_token: active_tuning.avg_chars_per_token,
|
||||||
63
evaluations/src/settings.rs
Normal file
63
evaluations/src/settings.rs
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
//! System settings enforcement for evaluations.
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use common::{
|
||||||
|
error::AppError,
|
||||||
|
storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
|
||||||
|
};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::args::Config;
|
||||||
|
|
||||||
|
/// Enforce evaluation-specific system settings overrides.
|
||||||
|
pub(crate) async fn enforce_system_settings(
|
||||||
|
db: &SurrealDbClient,
|
||||||
|
mut settings: SystemSettings,
|
||||||
|
provider_dimension: usize,
|
||||||
|
config: &Config,
|
||||||
|
) -> Result<SystemSettings> {
|
||||||
|
let mut updated_settings = settings.clone();
|
||||||
|
let mut needs_settings_update = false;
|
||||||
|
|
||||||
|
if provider_dimension != settings.embedding_dimensions as usize {
|
||||||
|
updated_settings.embedding_dimensions = provider_dimension as u32;
|
||||||
|
needs_settings_update = true;
|
||||||
|
}
|
||||||
|
if let Some(query_override) = config.query_model.as_deref() {
|
||||||
|
if settings.query_model != query_override {
|
||||||
|
info!(
|
||||||
|
model = query_override,
|
||||||
|
"Overriding system query model for this run"
|
||||||
|
);
|
||||||
|
updated_settings.query_model = query_override.to_string();
|
||||||
|
needs_settings_update = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if needs_settings_update {
|
||||||
|
settings = SystemSettings::update(db, updated_settings)
|
||||||
|
.await
|
||||||
|
.context("updating system settings overrides")?;
|
||||||
|
}
|
||||||
|
Ok(settings)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load existing system settings or initialize them via migrations.
|
||||||
|
pub(crate) async fn load_or_init_system_settings(
|
||||||
|
db: &SurrealDbClient,
|
||||||
|
_dimension: usize,
|
||||||
|
) -> Result<(SystemSettings, bool)> {
|
||||||
|
match SystemSettings::get_current(db).await {
|
||||||
|
Ok(settings) => Ok((settings, false)),
|
||||||
|
Err(AppError::NotFound(_)) => {
|
||||||
|
info!("System settings missing; applying database migrations for namespace");
|
||||||
|
db.apply_migrations()
|
||||||
|
.await
|
||||||
|
.context("applying database migrations after missing system settings")?;
|
||||||
|
let settings = SystemSettings::get_current(db)
|
||||||
|
.await
|
||||||
|
.context("loading system settings after migrations")?;
|
||||||
|
Ok((settings, true))
|
||||||
|
}
|
||||||
|
Err(err) => Err(err).context("loading system settings"),
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1214,3 +1214,30 @@ mod tests {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MARK: - Config integration (merged from slice.rs)
|
||||||
|
|
||||||
|
use crate::args::Config;
|
||||||
|
|
||||||
|
impl<'a> From<&'a Config> for SliceConfig<'a> {
|
||||||
|
fn from(config: &'a Config) -> Self {
|
||||||
|
slice_config_with_limit(config, None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn slice_config_with_limit<'a>(
|
||||||
|
config: &'a Config,
|
||||||
|
limit_override: Option<usize>,
|
||||||
|
) -> SliceConfig<'a> {
|
||||||
|
SliceConfig {
|
||||||
|
cache_dir: config.cache_dir.as_path(),
|
||||||
|
force_convert: config.force_convert,
|
||||||
|
explicit_slice: config.slice.as_deref(),
|
||||||
|
limit: limit_override.or(config.limit),
|
||||||
|
corpus_limit: config.corpus_limit,
|
||||||
|
slice_seed: config.slice_seed,
|
||||||
|
llm_mode: config.llm_mode,
|
||||||
|
negative_multiplier: config.negative_multiplier,
|
||||||
|
require_verified_chunks: config.retrieval.require_verified_chunks,
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user