mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-28 20:31:53 +01:00
fix: name harmonization of endpoints & ingestion security hardening
This commit is contained in:
@@ -86,6 +86,16 @@ pub struct AppConfig {
|
||||
pub retrieval_strategy: Option<String>,
|
||||
#[serde(default)]
|
||||
pub embedding_backend: EmbeddingBackend,
|
||||
#[serde(default = "default_ingest_max_body_bytes")]
|
||||
pub ingest_max_body_bytes: usize,
|
||||
#[serde(default = "default_ingest_max_files")]
|
||||
pub ingest_max_files: usize,
|
||||
#[serde(default = "default_ingest_max_content_bytes")]
|
||||
pub ingest_max_content_bytes: usize,
|
||||
#[serde(default = "default_ingest_max_context_bytes")]
|
||||
pub ingest_max_context_bytes: usize,
|
||||
#[serde(default = "default_ingest_max_category_bytes")]
|
||||
pub ingest_max_category_bytes: usize,
|
||||
}
|
||||
|
||||
/// Default data directory for persisted assets.
|
||||
@@ -103,6 +113,26 @@ fn default_reranking_enabled() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn default_ingest_max_body_bytes() -> usize {
|
||||
20_000_000
|
||||
}
|
||||
|
||||
fn default_ingest_max_files() -> usize {
|
||||
5
|
||||
}
|
||||
|
||||
fn default_ingest_max_content_bytes() -> usize {
|
||||
262_144
|
||||
}
|
||||
|
||||
fn default_ingest_max_context_bytes() -> usize {
|
||||
16_384
|
||||
}
|
||||
|
||||
fn default_ingest_max_category_bytes() -> usize {
|
||||
128
|
||||
}
|
||||
|
||||
pub fn ensure_ort_path() {
|
||||
if env::var_os("ORT_DYLIB_PATH").is_some() {
|
||||
return;
|
||||
@@ -157,6 +187,11 @@ impl Default for AppConfig {
|
||||
fastembed_max_length: None,
|
||||
retrieval_strategy: None,
|
||||
embedding_backend: EmbeddingBackend::default(),
|
||||
ingest_max_body_bytes: default_ingest_max_body_bytes(),
|
||||
ingest_max_files: default_ingest_max_files(),
|
||||
ingest_max_content_bytes: default_ingest_max_content_bytes(),
|
||||
ingest_max_context_bytes: default_ingest_max_context_bytes(),
|
||||
ingest_max_category_bytes: default_ingest_max_category_bytes(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
113
common/src/utils/ingest_limits.rs
Normal file
113
common/src/utils/ingest_limits.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use super::config::AppConfig;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum IngestValidationError {
|
||||
PayloadTooLarge(String),
|
||||
BadRequest(String),
|
||||
}
|
||||
|
||||
pub fn validate_ingest_input(
|
||||
config: &AppConfig,
|
||||
content: Option<&str>,
|
||||
context: &str,
|
||||
category: &str,
|
||||
file_count: usize,
|
||||
) -> Result<(), IngestValidationError> {
|
||||
if file_count > config.ingest_max_files {
|
||||
return Err(IngestValidationError::BadRequest(format!(
|
||||
"Too many files. Maximum allowed is {}",
|
||||
config.ingest_max_files
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(content) = content {
|
||||
if content.len() > config.ingest_max_content_bytes {
|
||||
return Err(IngestValidationError::PayloadTooLarge(format!(
|
||||
"Content is too large. Maximum allowed is {} bytes",
|
||||
config.ingest_max_content_bytes
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
if context.len() > config.ingest_max_context_bytes {
|
||||
return Err(IngestValidationError::PayloadTooLarge(format!(
|
||||
"Context is too large. Maximum allowed is {} bytes",
|
||||
config.ingest_max_context_bytes
|
||||
)));
|
||||
}
|
||||
|
||||
if category.len() > config.ingest_max_category_bytes {
|
||||
return Err(IngestValidationError::PayloadTooLarge(format!(
|
||||
"Category is too large. Maximum allowed is {} bytes",
|
||||
config.ingest_max_category_bytes
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_too_many_files() {
|
||||
let config = AppConfig {
|
||||
ingest_max_files: 1,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, Some("ok"), "ctx", "cat", 2);
|
||||
|
||||
assert!(matches!(result, Err(IngestValidationError::BadRequest(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_oversized_content() {
|
||||
let config = AppConfig {
|
||||
ingest_max_content_bytes: 4,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, Some("12345"), "ctx", "cat", 0);
|
||||
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(IngestValidationError::PayloadTooLarge(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_oversized_context() {
|
||||
let config = AppConfig {
|
||||
ingest_max_context_bytes: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, None, "long", "cat", 0);
|
||||
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(IngestValidationError::PayloadTooLarge(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_oversized_category() {
|
||||
let config = AppConfig {
|
||||
ingest_max_category_bytes: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, None, "ok", "long", 0);
|
||||
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(IngestValidationError::PayloadTooLarge(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_accepts_valid_payload() {
|
||||
let config = AppConfig::default();
|
||||
let result = validate_ingest_input(&config, Some("ok"), "ctx", "cat", 1);
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod config;
|
||||
pub mod embedding;
|
||||
pub mod ingest_limits;
|
||||
pub mod template_engine;
|
||||
|
||||
Reference in New Issue
Block a user