fix: name harmonization of endpoints & ingestion security hardening

This commit is contained in:
Per Stark
2026-02-13 22:36:00 +01:00
parent f22cac891c
commit e07199adfc
15 changed files with 258 additions and 53 deletions

View File

@@ -86,6 +86,16 @@ pub struct AppConfig {
pub retrieval_strategy: Option<String>,
#[serde(default)]
pub embedding_backend: EmbeddingBackend,
#[serde(default = "default_ingest_max_body_bytes")]
pub ingest_max_body_bytes: usize,
#[serde(default = "default_ingest_max_files")]
pub ingest_max_files: usize,
#[serde(default = "default_ingest_max_content_bytes")]
pub ingest_max_content_bytes: usize,
#[serde(default = "default_ingest_max_context_bytes")]
pub ingest_max_context_bytes: usize,
#[serde(default = "default_ingest_max_category_bytes")]
pub ingest_max_category_bytes: usize,
}
/// Default data directory for persisted assets.
@@ -103,6 +113,26 @@ fn default_reranking_enabled() -> bool {
false
}
fn default_ingest_max_body_bytes() -> usize {
20_000_000
}
fn default_ingest_max_files() -> usize {
5
}
fn default_ingest_max_content_bytes() -> usize {
262_144
}
fn default_ingest_max_context_bytes() -> usize {
16_384
}
fn default_ingest_max_category_bytes() -> usize {
128
}
pub fn ensure_ort_path() {
if env::var_os("ORT_DYLIB_PATH").is_some() {
return;
@@ -157,6 +187,11 @@ impl Default for AppConfig {
fastembed_max_length: None,
retrieval_strategy: None,
embedding_backend: EmbeddingBackend::default(),
ingest_max_body_bytes: default_ingest_max_body_bytes(),
ingest_max_files: default_ingest_max_files(),
ingest_max_content_bytes: default_ingest_max_content_bytes(),
ingest_max_context_bytes: default_ingest_max_context_bytes(),
ingest_max_category_bytes: default_ingest_max_category_bytes(),
}
}
}

View File

@@ -0,0 +1,113 @@
use super::config::AppConfig;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IngestValidationError {
PayloadTooLarge(String),
BadRequest(String),
}
pub fn validate_ingest_input(
config: &AppConfig,
content: Option<&str>,
context: &str,
category: &str,
file_count: usize,
) -> Result<(), IngestValidationError> {
if file_count > config.ingest_max_files {
return Err(IngestValidationError::BadRequest(format!(
"Too many files. Maximum allowed is {}",
config.ingest_max_files
)));
}
if let Some(content) = content {
if content.len() > config.ingest_max_content_bytes {
return Err(IngestValidationError::PayloadTooLarge(format!(
"Content is too large. Maximum allowed is {} bytes",
config.ingest_max_content_bytes
)));
}
}
if context.len() > config.ingest_max_context_bytes {
return Err(IngestValidationError::PayloadTooLarge(format!(
"Context is too large. Maximum allowed is {} bytes",
config.ingest_max_context_bytes
)));
}
if category.len() > config.ingest_max_category_bytes {
return Err(IngestValidationError::PayloadTooLarge(format!(
"Category is too large. Maximum allowed is {} bytes",
config.ingest_max_category_bytes
)));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn validate_ingest_input_rejects_too_many_files() {
let config = AppConfig {
ingest_max_files: 1,
..Default::default()
};
let result = validate_ingest_input(&config, Some("ok"), "ctx", "cat", 2);
assert!(matches!(result, Err(IngestValidationError::BadRequest(_))));
}
#[test]
fn validate_ingest_input_rejects_oversized_content() {
let config = AppConfig {
ingest_max_content_bytes: 4,
..Default::default()
};
let result = validate_ingest_input(&config, Some("12345"), "ctx", "cat", 0);
assert!(matches!(
result,
Err(IngestValidationError::PayloadTooLarge(_))
));
}
#[test]
fn validate_ingest_input_rejects_oversized_context() {
let config = AppConfig {
ingest_max_context_bytes: 2,
..Default::default()
};
let result = validate_ingest_input(&config, None, "long", "cat", 0);
assert!(matches!(
result,
Err(IngestValidationError::PayloadTooLarge(_))
));
}
#[test]
fn validate_ingest_input_rejects_oversized_category() {
let config = AppConfig {
ingest_max_category_bytes: 2,
..Default::default()
};
let result = validate_ingest_input(&config, None, "ok", "long", 0);
assert!(matches!(
result,
Err(IngestValidationError::PayloadTooLarge(_))
));
}
#[test]
fn validate_ingest_input_accepts_valid_payload() {
let config = AppConfig::default();
let result = validate_ingest_input(&config, Some("ok"), "ctx", "cat", 1);
assert!(result.is_ok());
}
}

View File

@@ -1,3 +1,4 @@
pub mod config;
pub mod embedding;
pub mod ingest_limits;
pub mod template_engine;