Files
minne/common/src/utils/config.rs
T

211 lines
6.2 KiB
Rust

use config::{Config, ConfigError, Environment, File};
use serde::Deserialize;
use std::env;
/// Selects the embedding backend for vector generation.
#[derive(Clone, Deserialize, Debug, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum EmbeddingBackend {
/// Use OpenAI-compatible API for embeddings.
OpenAI,
/// Use FastEmbed local embeddings (default).
#[default]
FastEmbed,
/// Use deterministic hashed embeddings (for testing).
Hashed,
}
#[derive(Clone, Deserialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum StorageKind {
Local,
Memory,
S3,
}
/// Default storage backend when none is configured.
fn default_storage_kind() -> StorageKind {
StorageKind::Local
}
fn default_s3_region() -> String {
"us-east-1".to_string()
}
/// Selects the strategy used for PDF ingestion.
#[derive(Clone, Deserialize, Debug)]
#[serde(rename_all = "kebab-case")]
pub enum PdfIngestMode {
/// Only rely on classic text extraction (no LLM fallbacks).
Classic,
/// Prefer fast text extraction, but fall back to the LLM rendering path when needed.
LlmFirst,
}
/// Default PDF ingestion mode when unset.
fn default_pdf_ingest_mode() -> PdfIngestMode {
PdfIngestMode::LlmFirst
}
/// Application configuration loaded from files and environment variables.
#[allow(clippy::module_name_repetitions)]
#[derive(Clone, Deserialize, Debug)]
pub struct AppConfig {
pub openai_api_key: String,
pub surrealdb_address: String,
pub surrealdb_username: String,
pub surrealdb_password: String,
pub surrealdb_namespace: String,
pub surrealdb_database: String,
#[serde(default = "default_data_dir")]
pub data_dir: String,
pub http_port: u16,
#[serde(default = "default_base_url")]
pub openai_base_url: String,
#[serde(default = "default_storage_kind")]
pub storage: StorageKind,
#[serde(default)]
pub s3_bucket: Option<String>,
#[serde(default)]
pub s3_endpoint: Option<String>,
#[serde(default = "default_s3_region")]
pub s3_region: String,
#[serde(default = "default_pdf_ingest_mode")]
pub pdf_ingest_mode: PdfIngestMode,
#[serde(default = "default_reranking_enabled")]
pub reranking_enabled: bool,
#[serde(default)]
pub reranking_pool_size: Option<usize>,
#[serde(default)]
pub fastembed_cache_dir: Option<String>,
#[serde(default)]
pub fastembed_show_download_progress: Option<bool>,
#[serde(default)]
pub fastembed_max_length: Option<usize>,
#[serde(default)]
pub retrieval_strategy: Option<String>,
#[serde(default)]
pub embedding_backend: EmbeddingBackend,
#[serde(default = "default_ingest_max_body_bytes")]
pub ingest_max_body_bytes: usize,
#[serde(default = "default_ingest_max_files")]
pub ingest_max_files: usize,
#[serde(default = "default_ingest_max_content_bytes")]
pub ingest_max_content_bytes: usize,
#[serde(default = "default_ingest_max_context_bytes")]
pub ingest_max_context_bytes: usize,
#[serde(default = "default_ingest_max_category_bytes")]
pub ingest_max_category_bytes: usize,
}
/// Default data directory for persisted assets.
fn default_data_dir() -> String {
"./data".to_string()
}
/// Default base URL used for OpenAI-compatible APIs.
fn default_base_url() -> String {
"https://api.openai.com/v1".to_string()
}
/// Whether reranking is enabled by default.
fn default_reranking_enabled() -> bool {
false
}
fn default_ingest_max_body_bytes() -> usize {
20_000_000
}
fn default_ingest_max_files() -> usize {
5
}
fn default_ingest_max_content_bytes() -> usize {
262_144
}
fn default_ingest_max_context_bytes() -> usize {
16_384
}
fn default_ingest_max_category_bytes() -> usize {
128
}
pub fn ensure_ort_path() {
if env::var_os("ORT_DYLIB_PATH").is_some() {
return;
}
if let Ok(mut exe) = env::current_exe() {
exe.pop();
if cfg!(target_os = "windows") {
for p in [
exe.join("onnxruntime.dll"),
exe.join("lib").join("onnxruntime.dll"),
] {
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
return;
}
}
}
let name = if cfg!(target_os = "macos") {
"libonnxruntime.dylib"
} else {
"libonnxruntime.so"
};
let p = exe.join("lib").join(name);
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
}
}
}
impl Default for AppConfig {
fn default() -> Self {
Self {
openai_api_key: String::new(),
surrealdb_address: String::new(),
surrealdb_username: String::new(),
surrealdb_password: String::new(),
surrealdb_namespace: String::new(),
surrealdb_database: String::new(),
data_dir: default_data_dir(),
http_port: 0,
openai_base_url: default_base_url(),
storage: default_storage_kind(),
s3_bucket: None,
s3_endpoint: None,
s3_region: default_s3_region(),
pdf_ingest_mode: default_pdf_ingest_mode(),
reranking_enabled: default_reranking_enabled(),
reranking_pool_size: None,
fastembed_cache_dir: None,
fastembed_show_download_progress: None,
fastembed_max_length: None,
retrieval_strategy: None,
embedding_backend: EmbeddingBackend::default(),
ingest_max_body_bytes: default_ingest_max_body_bytes(),
ingest_max_files: default_ingest_max_files(),
ingest_max_content_bytes: default_ingest_max_content_bytes(),
ingest_max_context_bytes: default_ingest_max_context_bytes(),
ingest_max_category_bytes: default_ingest_max_category_bytes(),
}
}
}
/// Loads the application configuration from the environment and optional config file.
#[allow(clippy::module_name_repetitions)]
pub fn get_config() -> Result<AppConfig, ConfigError> {
ensure_ort_path();
let config = Config::builder()
.add_source(File::with_name("config").required(false))
.add_source(Environment::default())
.build()?;
config.try_deserialize()
}