mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-14 06:16:19 +01:00
639 lines
26 KiB
Rust
639 lines
26 KiB
Rust
use std::{
|
||
env,
|
||
path::{Path, PathBuf},
|
||
};
|
||
|
||
use anyhow::{anyhow, Context, Result};
|
||
|
||
use crate::datasets::DatasetKind;
|
||
|
||
pub const DEFAULT_SLICE_SEED: u64 = 0x5eed_2025;
|
||
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum EmbeddingBackend {
|
||
Hashed,
|
||
FastEmbed,
|
||
}
|
||
|
||
impl Default for EmbeddingBackend {
|
||
fn default() -> Self {
|
||
Self::FastEmbed
|
||
}
|
||
}
|
||
|
||
impl std::str::FromStr for EmbeddingBackend {
|
||
type Err = anyhow::Error;
|
||
|
||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||
match s.to_ascii_lowercase().as_str() {
|
||
"hashed" => Ok(Self::Hashed),
|
||
"fastembed" | "fast-embed" | "fast" => Ok(Self::FastEmbed),
|
||
other => Err(anyhow!(
|
||
"unknown embedding backend '{other}'. Expected 'hashed' or 'fastembed'."
|
||
)),
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
pub struct Config {
|
||
pub convert_only: bool,
|
||
pub force_convert: bool,
|
||
pub dataset: DatasetKind,
|
||
pub llm_mode: bool,
|
||
pub corpus_limit: Option<usize>,
|
||
pub raw_dataset_path: PathBuf,
|
||
pub converted_dataset_path: PathBuf,
|
||
pub report_dir: PathBuf,
|
||
pub k: usize,
|
||
pub limit: Option<usize>,
|
||
pub summary_sample: usize,
|
||
pub full_context: bool,
|
||
pub chunk_min_chars: usize,
|
||
pub chunk_max_chars: usize,
|
||
pub chunk_vector_take: Option<usize>,
|
||
pub chunk_fts_take: Option<usize>,
|
||
pub chunk_token_budget: Option<usize>,
|
||
pub chunk_avg_chars_per_token: Option<usize>,
|
||
pub max_chunks_per_entity: Option<usize>,
|
||
pub rerank: bool,
|
||
pub rerank_pool_size: usize,
|
||
pub rerank_keep_top: usize,
|
||
pub concurrency: usize,
|
||
pub embedding_backend: EmbeddingBackend,
|
||
pub embedding_model: Option<String>,
|
||
pub cache_dir: PathBuf,
|
||
pub ingestion_cache_dir: PathBuf,
|
||
pub refresh_embeddings_only: bool,
|
||
pub detailed_report: bool,
|
||
pub slice: Option<String>,
|
||
pub reseed_slice: bool,
|
||
pub slice_seed: u64,
|
||
pub slice_grow: Option<usize>,
|
||
pub slice_offset: usize,
|
||
pub slice_reset_ingestion: bool,
|
||
pub negative_multiplier: f32,
|
||
pub label: Option<String>,
|
||
pub chunk_diagnostics_path: Option<PathBuf>,
|
||
pub inspect_question: Option<String>,
|
||
pub inspect_manifest: Option<PathBuf>,
|
||
pub query_model: Option<String>,
|
||
pub perf_log_json: Option<PathBuf>,
|
||
pub perf_log_dir: Option<PathBuf>,
|
||
pub perf_log_console: bool,
|
||
pub db_endpoint: String,
|
||
pub db_username: String,
|
||
pub db_password: String,
|
||
pub db_namespace: Option<String>,
|
||
pub db_database: Option<String>,
|
||
pub inspect_db_state: Option<PathBuf>,
|
||
}
|
||
|
||
impl Default for Config {
|
||
fn default() -> Self {
|
||
let dataset = DatasetKind::default();
|
||
Self {
|
||
convert_only: false,
|
||
force_convert: false,
|
||
dataset,
|
||
llm_mode: false,
|
||
corpus_limit: None,
|
||
raw_dataset_path: dataset.default_raw_path(),
|
||
converted_dataset_path: dataset.default_converted_path(),
|
||
report_dir: PathBuf::from("eval/reports"),
|
||
k: 5,
|
||
limit: Some(200),
|
||
summary_sample: 5,
|
||
full_context: false,
|
||
chunk_min_chars: 500,
|
||
chunk_max_chars: 2_000,
|
||
chunk_vector_take: None,
|
||
chunk_fts_take: None,
|
||
chunk_token_budget: None,
|
||
chunk_avg_chars_per_token: None,
|
||
max_chunks_per_entity: None,
|
||
rerank: true,
|
||
rerank_pool_size: 16,
|
||
rerank_keep_top: 10,
|
||
concurrency: 4,
|
||
embedding_backend: EmbeddingBackend::FastEmbed,
|
||
embedding_model: None,
|
||
cache_dir: PathBuf::from("eval/cache"),
|
||
ingestion_cache_dir: PathBuf::from("eval/cache/ingested"),
|
||
refresh_embeddings_only: false,
|
||
detailed_report: false,
|
||
slice: None,
|
||
reseed_slice: false,
|
||
slice_seed: DEFAULT_SLICE_SEED,
|
||
slice_grow: None,
|
||
slice_offset: 0,
|
||
slice_reset_ingestion: false,
|
||
negative_multiplier: crate::slices::DEFAULT_NEGATIVE_MULTIPLIER,
|
||
label: None,
|
||
chunk_diagnostics_path: None,
|
||
inspect_question: None,
|
||
inspect_manifest: None,
|
||
query_model: None,
|
||
inspect_db_state: None,
|
||
perf_log_json: None,
|
||
perf_log_dir: None,
|
||
perf_log_console: false,
|
||
db_endpoint: "ws://127.0.0.1:8000".to_string(),
|
||
db_username: "root_user".to_string(),
|
||
db_password: "root_password".to_string(),
|
||
db_namespace: None,
|
||
db_database: None,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl Config {
|
||
pub fn context_token_limit(&self) -> Option<usize> {
|
||
None
|
||
}
|
||
}
|
||
|
||
#[derive(Debug)]
|
||
pub struct ParsedArgs {
|
||
pub config: Config,
|
||
pub show_help: bool,
|
||
}
|
||
|
||
pub fn parse() -> Result<ParsedArgs> {
|
||
let mut config = Config::default();
|
||
let mut show_help = false;
|
||
let mut raw_overridden = false;
|
||
let mut converted_overridden = false;
|
||
|
||
let mut args = env::args().skip(1).peekable();
|
||
while let Some(arg) = args.next() {
|
||
match arg.as_str() {
|
||
"-h" | "--help" => {
|
||
show_help = true;
|
||
break;
|
||
}
|
||
"--convert-only" => config.convert_only = true,
|
||
"--force" | "--refresh" => config.force_convert = true,
|
||
"--llm-mode" => {
|
||
config.llm_mode = true;
|
||
}
|
||
"--dataset" => {
|
||
let value = take_value("--dataset", &mut args)?;
|
||
let parsed = value.parse::<DatasetKind>()?;
|
||
config.dataset = parsed;
|
||
if !raw_overridden {
|
||
config.raw_dataset_path = parsed.default_raw_path();
|
||
}
|
||
if !converted_overridden {
|
||
config.converted_dataset_path = parsed.default_converted_path();
|
||
}
|
||
}
|
||
"--slice" => {
|
||
let value = take_value("--slice", &mut args)?;
|
||
config.slice = Some(value);
|
||
}
|
||
"--label" => {
|
||
let value = take_value("--label", &mut args)?;
|
||
config.label = Some(value);
|
||
}
|
||
"--query-model" => {
|
||
let value = take_value("--query-model", &mut args)?;
|
||
if value.trim().is_empty() {
|
||
return Err(anyhow!("--query-model requires a non-empty model name"));
|
||
}
|
||
config.query_model = Some(value.trim().to_string());
|
||
}
|
||
"--slice-grow" => {
|
||
let value = take_value("--slice-grow", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --slice-grow value '{value}' as usize")
|
||
})?;
|
||
if parsed == 0 {
|
||
return Err(anyhow!("--slice-grow must be greater than zero"));
|
||
}
|
||
config.slice_grow = Some(parsed);
|
||
}
|
||
"--slice-offset" => {
|
||
let value = take_value("--slice-offset", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --slice-offset value '{value}' as usize")
|
||
})?;
|
||
config.slice_offset = parsed;
|
||
}
|
||
"--raw" => {
|
||
let value = take_value("--raw", &mut args)?;
|
||
config.raw_dataset_path = PathBuf::from(value);
|
||
raw_overridden = true;
|
||
}
|
||
"--converted" => {
|
||
let value = take_value("--converted", &mut args)?;
|
||
config.converted_dataset_path = PathBuf::from(value);
|
||
converted_overridden = true;
|
||
}
|
||
"--corpus-limit" => {
|
||
let value = take_value("--corpus-limit", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --corpus-limit value '{value}' as usize")
|
||
})?;
|
||
config.corpus_limit = if parsed == 0 { None } else { Some(parsed) };
|
||
}
|
||
"--reseed-slice" => {
|
||
config.reseed_slice = true;
|
||
}
|
||
"--slice-reset-ingestion" => {
|
||
config.slice_reset_ingestion = true;
|
||
}
|
||
"--report-dir" => {
|
||
let value = take_value("--report-dir", &mut args)?;
|
||
config.report_dir = PathBuf::from(value);
|
||
}
|
||
"--k" => {
|
||
let value = take_value("--k", &mut args)?;
|
||
let parsed = value
|
||
.parse::<usize>()
|
||
.with_context(|| format!("failed to parse --k value '{value}' as usize"))?;
|
||
if parsed == 0 {
|
||
return Err(anyhow!("--k must be greater than zero"));
|
||
}
|
||
config.k = parsed;
|
||
}
|
||
"--limit" => {
|
||
let value = take_value("--limit", &mut args)?;
|
||
let parsed = value
|
||
.parse::<usize>()
|
||
.with_context(|| format!("failed to parse --limit value '{value}' as usize"))?;
|
||
config.limit = if parsed == 0 { None } else { Some(parsed) };
|
||
}
|
||
"--sample" => {
|
||
let value = take_value("--sample", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --sample value '{value}' as usize")
|
||
})?;
|
||
config.summary_sample = parsed.max(1);
|
||
}
|
||
"--full-context" => {
|
||
config.full_context = true;
|
||
}
|
||
"--chunk-min" => {
|
||
let value = take_value("--chunk-min", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --chunk-min value '{value}' as usize")
|
||
})?;
|
||
config.chunk_min_chars = parsed.max(1);
|
||
}
|
||
"--chunk-max" => {
|
||
let value = take_value("--chunk-max", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --chunk-max value '{value}' as usize")
|
||
})?;
|
||
config.chunk_max_chars = parsed.max(1);
|
||
}
|
||
"--chunk-vector-take" => {
|
||
let value = take_value("--chunk-vector-take", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --chunk-vector-take value '{value}' as usize")
|
||
})?;
|
||
if parsed == 0 {
|
||
return Err(anyhow!("--chunk-vector-take must be greater than zero"));
|
||
}
|
||
config.chunk_vector_take = Some(parsed);
|
||
}
|
||
"--chunk-fts-take" => {
|
||
let value = take_value("--chunk-fts-take", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --chunk-fts-take value '{value}' as usize")
|
||
})?;
|
||
if parsed == 0 {
|
||
return Err(anyhow!("--chunk-fts-take must be greater than zero"));
|
||
}
|
||
config.chunk_fts_take = Some(parsed);
|
||
}
|
||
"--chunk-token-budget" => {
|
||
let value = take_value("--chunk-token-budget", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --chunk-token-budget value '{value}' as usize")
|
||
})?;
|
||
if parsed == 0 {
|
||
return Err(anyhow!("--chunk-token-budget must be greater than zero"));
|
||
}
|
||
config.chunk_token_budget = Some(parsed);
|
||
}
|
||
"--chunk-token-chars" => {
|
||
let value = take_value("--chunk-token-chars", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --chunk-token-chars value '{value}' as usize")
|
||
})?;
|
||
if parsed == 0 {
|
||
return Err(anyhow!("--chunk-token-chars must be greater than zero"));
|
||
}
|
||
config.chunk_avg_chars_per_token = Some(parsed);
|
||
}
|
||
"--max-chunks-per-entity" => {
|
||
let value = take_value("--max-chunks-per-entity", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --max-chunks-per-entity value '{value}' as usize")
|
||
})?;
|
||
if parsed == 0 {
|
||
return Err(anyhow!("--max-chunks-per-entity must be greater than zero"));
|
||
}
|
||
config.max_chunks_per_entity = Some(parsed);
|
||
}
|
||
"--embedding" => {
|
||
let value = take_value("--embedding", &mut args)?;
|
||
config.embedding_backend = value.parse()?;
|
||
}
|
||
"--embedding-model" => {
|
||
let value = take_value("--embedding-model", &mut args)?;
|
||
config.embedding_model = Some(value.trim().to_string());
|
||
}
|
||
"--cache-dir" => {
|
||
let value = take_value("--cache-dir", &mut args)?;
|
||
config.cache_dir = PathBuf::from(value);
|
||
}
|
||
"--ingestion-cache-dir" => {
|
||
let value = take_value("--ingestion-cache-dir", &mut args)?;
|
||
config.ingestion_cache_dir = PathBuf::from(value);
|
||
}
|
||
"--negative-multiplier" => {
|
||
let value = take_value("--negative-multiplier", &mut args)?;
|
||
let parsed = value.parse::<f32>().with_context(|| {
|
||
format!("failed to parse --negative-multiplier value '{value}' as f32")
|
||
})?;
|
||
if !(parsed.is_finite() && parsed > 0.0) {
|
||
return Err(anyhow!(
|
||
"--negative-multiplier must be a positive finite number"
|
||
));
|
||
}
|
||
config.negative_multiplier = parsed;
|
||
}
|
||
"--no-rerank" => {
|
||
config.rerank = false;
|
||
}
|
||
"--rerank-pool" => {
|
||
let value = take_value("--rerank-pool", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --rerank-pool value '{value}' as usize")
|
||
})?;
|
||
config.rerank_pool_size = parsed.max(1);
|
||
}
|
||
"--rerank-keep" => {
|
||
let value = take_value("--rerank-keep", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --rerank-keep value '{value}' as usize")
|
||
})?;
|
||
config.rerank_keep_top = parsed.max(1);
|
||
}
|
||
"--concurrency" => {
|
||
let value = take_value("--concurrency", &mut args)?;
|
||
let parsed = value.parse::<usize>().with_context(|| {
|
||
format!("failed to parse --concurrency value '{value}' as usize")
|
||
})?;
|
||
config.concurrency = parsed.max(1);
|
||
}
|
||
"--refresh-embeddings" => {
|
||
config.refresh_embeddings_only = true;
|
||
}
|
||
"--detailed-report" => {
|
||
config.detailed_report = true;
|
||
}
|
||
"--chunk-diagnostics" => {
|
||
let value = take_value("--chunk-diagnostics", &mut args)?;
|
||
config.chunk_diagnostics_path = Some(PathBuf::from(value));
|
||
}
|
||
"--inspect-question" => {
|
||
let value = take_value("--inspect-question", &mut args)?;
|
||
config.inspect_question = Some(value);
|
||
}
|
||
"--inspect-manifest" => {
|
||
let value = take_value("--inspect-manifest", &mut args)?;
|
||
config.inspect_manifest = Some(PathBuf::from(value));
|
||
}
|
||
"--inspect-db-state" => {
|
||
let value = take_value("--inspect-db-state", &mut args)?;
|
||
config.inspect_db_state = Some(PathBuf::from(value));
|
||
}
|
||
"--perf-log-json" => {
|
||
let value = take_value("--perf-log-json", &mut args)?;
|
||
config.perf_log_json = Some(PathBuf::from(value));
|
||
}
|
||
"--perf-log-dir" => {
|
||
let value = take_value("--perf-log-dir", &mut args)?;
|
||
config.perf_log_dir = Some(PathBuf::from(value));
|
||
}
|
||
"--perf-log" => {
|
||
config.perf_log_console = true;
|
||
}
|
||
"--db-endpoint" => {
|
||
let value = take_value("--db-endpoint", &mut args)?;
|
||
config.db_endpoint = value;
|
||
}
|
||
"--db-user" => {
|
||
let value = take_value("--db-user", &mut args)?;
|
||
config.db_username = value;
|
||
}
|
||
"--db-pass" => {
|
||
let value = take_value("--db-pass", &mut args)?;
|
||
config.db_password = value;
|
||
}
|
||
"--db-namespace" => {
|
||
let value = take_value("--db-namespace", &mut args)?;
|
||
config.db_namespace = Some(value);
|
||
}
|
||
"--db-database" => {
|
||
let value = take_value("--db-database", &mut args)?;
|
||
config.db_database = Some(value);
|
||
}
|
||
unknown => {
|
||
return Err(anyhow!(
|
||
"unknown argument '{unknown}'. Use --help to see available options."
|
||
));
|
||
}
|
||
}
|
||
}
|
||
|
||
if config.chunk_min_chars >= config.chunk_max_chars {
|
||
return Err(anyhow!(
|
||
"--chunk-min must be less than --chunk-max (got {} >= {})",
|
||
config.chunk_min_chars,
|
||
config.chunk_max_chars
|
||
));
|
||
}
|
||
|
||
if config.rerank && config.rerank_pool_size == 0 {
|
||
return Err(anyhow!(
|
||
"--rerank-pool must be greater than zero when reranking is enabled"
|
||
));
|
||
}
|
||
|
||
if config.concurrency == 0 {
|
||
return Err(anyhow!("--concurrency must be greater than zero"));
|
||
}
|
||
|
||
if config.embedding_backend == EmbeddingBackend::Hashed && config.embedding_model.is_some() {
|
||
return Err(anyhow!(
|
||
"--embedding-model cannot be used with the 'hashed' embedding backend"
|
||
));
|
||
}
|
||
|
||
if let Some(limit) = config.limit {
|
||
if let Some(corpus_limit) = config.corpus_limit {
|
||
if corpus_limit < limit {
|
||
config.corpus_limit = Some(limit);
|
||
}
|
||
} else {
|
||
let default_multiplier = 10usize;
|
||
let mut computed = limit.saturating_mul(default_multiplier);
|
||
if computed < limit {
|
||
computed = limit;
|
||
}
|
||
let max_cap = 1_000usize;
|
||
if computed > max_cap {
|
||
computed = max_cap;
|
||
}
|
||
config.corpus_limit = Some(computed);
|
||
}
|
||
}
|
||
|
||
if config.perf_log_dir.is_none() {
|
||
if let Ok(dir) = env::var("EVAL_PERF_LOG_DIR") {
|
||
if !dir.trim().is_empty() {
|
||
config.perf_log_dir = Some(PathBuf::from(dir));
|
||
}
|
||
}
|
||
}
|
||
|
||
if let Ok(endpoint) = env::var("EVAL_DB_ENDPOINT") {
|
||
if !endpoint.trim().is_empty() {
|
||
config.db_endpoint = endpoint;
|
||
}
|
||
}
|
||
if let Ok(username) = env::var("EVAL_DB_USERNAME") {
|
||
if !username.trim().is_empty() {
|
||
config.db_username = username;
|
||
}
|
||
}
|
||
if let Ok(password) = env::var("EVAL_DB_PASSWORD") {
|
||
if !password.trim().is_empty() {
|
||
config.db_password = password;
|
||
}
|
||
}
|
||
if let Ok(ns) = env::var("EVAL_DB_NAMESPACE") {
|
||
if !ns.trim().is_empty() {
|
||
config.db_namespace = Some(ns);
|
||
}
|
||
}
|
||
if let Ok(db) = env::var("EVAL_DB_DATABASE") {
|
||
if !db.trim().is_empty() {
|
||
config.db_database = Some(db);
|
||
}
|
||
}
|
||
Ok(ParsedArgs { config, show_help })
|
||
}
|
||
|
||
fn take_value<'a, I>(flag: &str, iter: &mut std::iter::Peekable<I>) -> Result<String>
|
||
where
|
||
I: Iterator<Item = String>,
|
||
{
|
||
iter.next().ok_or_else(|| anyhow!("{flag} expects a value"))
|
||
}
|
||
|
||
pub fn print_help() {
|
||
println!(
|
||
"\
|
||
eval — dataset conversion, ingestion, and retrieval evaluation CLI
|
||
|
||
USAGE:
|
||
cargo eval -- [options]
|
||
# or
|
||
cargo run -p eval -- [options]
|
||
|
||
OPTIONS:
|
||
--convert-only Convert the selected dataset and exit.
|
||
--force, --refresh Regenerate the converted dataset even if it already exists.
|
||
--dataset <name> Dataset to evaluate: 'squad' (default) or 'natural-questions'.
|
||
--llm-mode Enable LLM-assisted evaluation features (includes unanswerable cases).
|
||
--slice <id|path> Use a cached dataset slice by id (under eval/cache/slices) or by explicit path.
|
||
--label <text> Annotate the run; label is stored in JSON/Markdown reports.
|
||
--query-model <name> Override the SurrealDB system settings query model (e.g., gpt-4o-mini) for this run.
|
||
--slice-grow <int> Grow the slice ledger to contain at least this many answerable cases, then exit.
|
||
--slice-offset <int> Evaluate questions starting at this offset within the slice (default: 0).
|
||
--reseed-slice Ignore cached corpus state and rebuild the slice's SurrealDB corpus.
|
||
--slice-reset-ingestion
|
||
Delete cached paragraph shards before rebuilding the ingestion corpus.
|
||
--corpus-limit <int> Cap the slice corpus size (positives + negatives). Defaults to ~10× --limit, capped at 1000.
|
||
--raw <path> Path to the raw dataset (defaults per dataset).
|
||
--converted <path> Path to write/read the converted dataset (defaults per dataset).
|
||
--report-dir <path> Directory to write evaluation reports (default: eval/reports).
|
||
--k <int> Precision@k cutoff (default: 5).
|
||
--limit <int> Limit the number of questions evaluated (default: 200, 0 = all).
|
||
--sample <int> Number of mismatches to surface in the Markdown summary (default: 5).
|
||
--full-context Disable context cropping when converting datasets (ingest entire documents).
|
||
--chunk-min <int> Minimum characters per chunk for text splitting (default: 500).
|
||
--chunk-max <int> Maximum characters per chunk for text splitting (default: 2000).
|
||
--chunk-vector-take <int>
|
||
Override chunk vector candidate cap (default: 20).
|
||
--chunk-fts-take <int>
|
||
Override chunk FTS candidate cap (default: 20).
|
||
--chunk-token-budget <int>
|
||
Override chunk token budget estimate for assembly (default: 10000).
|
||
--chunk-token-chars <int>
|
||
Override average characters per token used for budgeting (default: 4).
|
||
--max-chunks-per-entity <int>
|
||
Override maximum chunks attached per entity (default: 4).
|
||
--embedding <name> Embedding backend: 'fastembed' (default) or 'hashed'.
|
||
--embedding-model <code>
|
||
FastEmbed model code (defaults to crate preset when omitted).
|
||
--cache-dir <path> Directory for embedding caches (default: eval/cache).
|
||
--ingestion-cache-dir <path>
|
||
Directory for ingestion corpora caches (default: eval/cache/ingested).
|
||
--negative-multiplier <float>
|
||
Target negative-to-positive paragraph ratio for slice growth (default: 4.0).
|
||
--refresh-embeddings Recompute embeddings for cached corpora without re-running ingestion.
|
||
--detailed-report Include entity descriptions and categories in JSON reports.
|
||
--chunk-diagnostics <path>
|
||
Write per-query chunk diagnostics JSONL to the provided path.
|
||
--no-rerank Disable the FastEmbed reranking stage (enabled by default).
|
||
--rerank-pool <int> Reranking engine pool size / parallelism (default: 16).
|
||
--rerank-keep <int> Keep top-N entities after reranking (default: 10).
|
||
--inspect-question <id>
|
||
Inspect an ingestion cache question and exit (requires --inspect-manifest).
|
||
--inspect-manifest <path>
|
||
Path to an ingestion cache manifest JSON for inspection mode.
|
||
--inspect-db-state <path>
|
||
Optional override for the SurrealDB state.json used during inspection; defaults to the state recorded for the selected dataset slice.
|
||
--db-endpoint <url> SurrealDB server endpoint (use http:// or https:// to enable SurQL export/import; ws:// endpoints reuse existing namespaces but skip SurQL exports; default: ws://127.0.0.1:8000).
|
||
--db-user <value> SurrealDB root username (default: root_user).
|
||
--db-pass <value> SurrealDB root password (default: root_password).
|
||
--db-namespace <ns> Override the namespace used on the SurrealDB server; state.json tracks this value and the ledger case count so changing it or requesting more cases via --limit triggers a rebuild/import (default: derived from dataset).
|
||
--db-database <db> Override the database used on the SurrealDB server; recorded alongside namespace in state.json (default: derived from slice).
|
||
--perf-log Print per-stage performance timings to stdout after the run.
|
||
--perf-log-json <path>
|
||
Write structured performance telemetry JSON to the provided path.
|
||
--perf-log-dir <path>
|
||
Directory that receives timestamped perf JSON copies (defaults to $EVAL_PERF_LOG_DIR).
|
||
|
||
Examples:
|
||
cargo eval -- --dataset squad --limit 10 --detailed-report
|
||
cargo eval -- --dataset natural-questions --limit 1 --rerank-pool 1 --detailed-report
|
||
|
||
Notes:
|
||
The latest run's JSON/Markdown reports are saved as eval/reports/latest.json and latest.md, making it easy to script automated checks.
|
||
-h, --help Show this help text.
|
||
|
||
Dataset defaults (from eval/manifest.yaml):
|
||
squad raw: eval/data/raw/squad/dev-v2.0.json
|
||
converted: eval/data/converted/squad-minne.json
|
||
natural-questions raw: eval/data/raw/nq/dev-all.jsonl
|
||
converted: eval/data/converted/nq-dev-minne.json
|
||
"
|
||
);
|
||
}
|
||
|
||
pub fn ensure_parent(path: &Path) -> Result<()> {
|
||
if let Some(parent) = path.parent() {
|
||
std::fs::create_dir_all(parent)
|
||
.with_context(|| format!("creating parent directory for {}", path.display()))?;
|
||
}
|
||
Ok(())
|
||
}
|