Files
minne/evaluations/src/corpus/mod.rs
T
2026-06-25 16:33:48 +02:00

27 lines
991 B
Rust

mod config;
mod orchestrator;
pub(crate) mod store;
pub use config::CorpusCacheConfig;
pub use orchestrator::{
cached_corpus_dir, compute_ingestion_fingerprint, corpus_handle_from_manifest, ensure_corpus,
load_cached_manifest, persist_corpus_manifest,
};
pub use store::{
CorpusHandle, CorpusManifest, CorpusMetadata, CorpusQuestion, MANIFEST_VERSION,
NamespaceSeedRecord, ParagraphShard, ParagraphShardStore, seed_manifest_into_db,
window_manifest,
};
pub fn make_ingestion_config(config: &crate::args::Config) -> ingestion_pipeline::IngestionConfig {
ingestion_pipeline::IngestionConfig {
tuning: ingestion_pipeline::IngestionTuning {
chunk_min_tokens: config.ingest.ingest_chunk_min_tokens,
chunk_max_tokens: config.ingest.ingest_chunk_max_tokens,
chunk_overlap_tokens: config.ingest.ingest_chunk_overlap_tokens,
..Default::default()
},
chunk_only: !config.ingest.include_entities,
}
}