mirror of
https://github.com/perstarkse/minne.git
synced 2026-02-21 23:57:41 +01:00
fix: name harmonization of endpoints & ingestion security hardening
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
# Changelog
|
||||
## Unreleased
|
||||
- Fix: edge case where navigation back to a chat page could trigger a new response generation
|
||||
- Security: hardened storage-layer queries by replacing user-influenced string interpolation with bound parameters and adding injection regression tests.
|
||||
- Security: removed raw ingestion payload logging from API/HTML ingress handlers and replaced it with metadata-only structured logs.
|
||||
- Security: Misc security fixes
|
||||
|
||||
## 1.0.1 (2026-02-11)
|
||||
- Shipped an S3 storage backend so content can be stored in object storage instead of local disk, with configuration support for S3 deployments.
|
||||
|
||||
@@ -20,6 +20,9 @@ pub enum ApiError {
|
||||
|
||||
#[error("Unauthorized: {0}")]
|
||||
Unauthorized(String),
|
||||
|
||||
#[error("Payload too large: {0}")]
|
||||
PayloadTooLarge(String),
|
||||
}
|
||||
|
||||
impl From<AppError> for ApiError {
|
||||
@@ -67,6 +70,13 @@ impl IntoResponse for ApiError {
|
||||
status: "error".to_string(),
|
||||
},
|
||||
),
|
||||
Self::PayloadTooLarge(message) => (
|
||||
StatusCode::PAYLOAD_TOO_LARGE,
|
||||
ErrorResponse {
|
||||
error: message,
|
||||
status: "error".to_string(),
|
||||
},
|
||||
),
|
||||
};
|
||||
|
||||
(status, Json(error_response)).into_response()
|
||||
@@ -132,6 +142,10 @@ mod tests {
|
||||
// Test unauthorized status
|
||||
let error = ApiError::Unauthorized("not allowed".to_string());
|
||||
assert_status_code(error, StatusCode::UNAUTHORIZED);
|
||||
|
||||
// Test payload too large status
|
||||
let error = ApiError::PayloadTooLarge("too big".to_string());
|
||||
assert_status_code(error, StatusCode::PAYLOAD_TOO_LARGE);
|
||||
}
|
||||
|
||||
// Alternative approach that doesn't try to parse the response body
|
||||
|
||||
@@ -6,7 +6,7 @@ use axum::{
|
||||
Router,
|
||||
};
|
||||
use middleware_api_auth::api_auth;
|
||||
use routes::{categories::get_categories, ingress::ingest_data, liveness::live, readiness::ready};
|
||||
use routes::{categories::get_categories, ingest::ingest_data, liveness::live, readiness::ready};
|
||||
|
||||
pub mod api_state;
|
||||
pub mod error;
|
||||
@@ -26,9 +26,13 @@ where
|
||||
|
||||
// Protected API endpoints (require auth)
|
||||
let protected = Router::new()
|
||||
.route("/ingress", post(ingest_data))
|
||||
.route(
|
||||
"/ingest",
|
||||
post(ingest_data).layer(DefaultBodyLimit::max(
|
||||
app_state.config.ingest_max_body_bytes,
|
||||
)),
|
||||
)
|
||||
.route("/categories", get(get_categories))
|
||||
.layer(DefaultBodyLimit::max(1024 * 1024 * 1024))
|
||||
.route_layer(from_fn_with_state(app_state.clone(), api_auth));
|
||||
|
||||
public.merge(protected)
|
||||
|
||||
@@ -6,6 +6,7 @@ use common::{
|
||||
file_info::FileInfo, ingestion_payload::IngestionPayload, ingestion_task::IngestionTask,
|
||||
user::User,
|
||||
},
|
||||
utils::ingest_limits::{validate_ingest_input, IngestValidationError},
|
||||
};
|
||||
use futures::{future::try_join_all, TryFutureExt};
|
||||
use serde_json::json;
|
||||
@@ -19,7 +20,7 @@ pub struct IngestParams {
|
||||
pub content: Option<String>,
|
||||
pub context: String,
|
||||
pub category: String,
|
||||
#[form_data(limit = "10000000")] // Adjust limit as needed
|
||||
#[form_data(limit = "20000000")]
|
||||
#[form_data(default)]
|
||||
pub files: Vec<FieldData<NamedTempFile>>,
|
||||
}
|
||||
@@ -36,6 +37,22 @@ pub async fn ingest_data(
|
||||
let category_bytes = input.category.len();
|
||||
let file_count = input.files.len();
|
||||
|
||||
match validate_ingest_input(
|
||||
&state.config,
|
||||
input.content.as_deref(),
|
||||
&input.context,
|
||||
&input.category,
|
||||
file_count,
|
||||
) {
|
||||
Ok(()) => {}
|
||||
Err(IngestValidationError::PayloadTooLarge(message)) => {
|
||||
return Err(ApiError::PayloadTooLarge(message));
|
||||
}
|
||||
Err(IngestValidationError::BadRequest(message)) => {
|
||||
return Err(ApiError::ValidationError(message));
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
user_id = %user_id,
|
||||
has_content,
|
||||
@@ -43,7 +60,7 @@ pub async fn ingest_data(
|
||||
context_bytes,
|
||||
category_bytes,
|
||||
file_count,
|
||||
"Received ingestion request"
|
||||
"Received ingest request"
|
||||
);
|
||||
|
||||
let file_infos = try_join_all(input.files.into_iter().map(|file| {
|
||||
@@ -1,4 +1,4 @@
|
||||
pub mod categories;
|
||||
pub mod ingress;
|
||||
pub mod ingest;
|
||||
pub mod liveness;
|
||||
pub mod readiness;
|
||||
|
||||
@@ -86,6 +86,16 @@ pub struct AppConfig {
|
||||
pub retrieval_strategy: Option<String>,
|
||||
#[serde(default)]
|
||||
pub embedding_backend: EmbeddingBackend,
|
||||
#[serde(default = "default_ingest_max_body_bytes")]
|
||||
pub ingest_max_body_bytes: usize,
|
||||
#[serde(default = "default_ingest_max_files")]
|
||||
pub ingest_max_files: usize,
|
||||
#[serde(default = "default_ingest_max_content_bytes")]
|
||||
pub ingest_max_content_bytes: usize,
|
||||
#[serde(default = "default_ingest_max_context_bytes")]
|
||||
pub ingest_max_context_bytes: usize,
|
||||
#[serde(default = "default_ingest_max_category_bytes")]
|
||||
pub ingest_max_category_bytes: usize,
|
||||
}
|
||||
|
||||
/// Default data directory for persisted assets.
|
||||
@@ -103,6 +113,26 @@ fn default_reranking_enabled() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn default_ingest_max_body_bytes() -> usize {
|
||||
20_000_000
|
||||
}
|
||||
|
||||
fn default_ingest_max_files() -> usize {
|
||||
5
|
||||
}
|
||||
|
||||
fn default_ingest_max_content_bytes() -> usize {
|
||||
262_144
|
||||
}
|
||||
|
||||
fn default_ingest_max_context_bytes() -> usize {
|
||||
16_384
|
||||
}
|
||||
|
||||
fn default_ingest_max_category_bytes() -> usize {
|
||||
128
|
||||
}
|
||||
|
||||
pub fn ensure_ort_path() {
|
||||
if env::var_os("ORT_DYLIB_PATH").is_some() {
|
||||
return;
|
||||
@@ -157,6 +187,11 @@ impl Default for AppConfig {
|
||||
fastembed_max_length: None,
|
||||
retrieval_strategy: None,
|
||||
embedding_backend: EmbeddingBackend::default(),
|
||||
ingest_max_body_bytes: default_ingest_max_body_bytes(),
|
||||
ingest_max_files: default_ingest_max_files(),
|
||||
ingest_max_content_bytes: default_ingest_max_content_bytes(),
|
||||
ingest_max_context_bytes: default_ingest_max_context_bytes(),
|
||||
ingest_max_category_bytes: default_ingest_max_category_bytes(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
113
common/src/utils/ingest_limits.rs
Normal file
113
common/src/utils/ingest_limits.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use super::config::AppConfig;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum IngestValidationError {
|
||||
PayloadTooLarge(String),
|
||||
BadRequest(String),
|
||||
}
|
||||
|
||||
pub fn validate_ingest_input(
|
||||
config: &AppConfig,
|
||||
content: Option<&str>,
|
||||
context: &str,
|
||||
category: &str,
|
||||
file_count: usize,
|
||||
) -> Result<(), IngestValidationError> {
|
||||
if file_count > config.ingest_max_files {
|
||||
return Err(IngestValidationError::BadRequest(format!(
|
||||
"Too many files. Maximum allowed is {}",
|
||||
config.ingest_max_files
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(content) = content {
|
||||
if content.len() > config.ingest_max_content_bytes {
|
||||
return Err(IngestValidationError::PayloadTooLarge(format!(
|
||||
"Content is too large. Maximum allowed is {} bytes",
|
||||
config.ingest_max_content_bytes
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
if context.len() > config.ingest_max_context_bytes {
|
||||
return Err(IngestValidationError::PayloadTooLarge(format!(
|
||||
"Context is too large. Maximum allowed is {} bytes",
|
||||
config.ingest_max_context_bytes
|
||||
)));
|
||||
}
|
||||
|
||||
if category.len() > config.ingest_max_category_bytes {
|
||||
return Err(IngestValidationError::PayloadTooLarge(format!(
|
||||
"Category is too large. Maximum allowed is {} bytes",
|
||||
config.ingest_max_category_bytes
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_too_many_files() {
|
||||
let config = AppConfig {
|
||||
ingest_max_files: 1,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, Some("ok"), "ctx", "cat", 2);
|
||||
|
||||
assert!(matches!(result, Err(IngestValidationError::BadRequest(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_oversized_content() {
|
||||
let config = AppConfig {
|
||||
ingest_max_content_bytes: 4,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, Some("12345"), "ctx", "cat", 0);
|
||||
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(IngestValidationError::PayloadTooLarge(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_oversized_context() {
|
||||
let config = AppConfig {
|
||||
ingest_max_context_bytes: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, None, "long", "cat", 0);
|
||||
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(IngestValidationError::PayloadTooLarge(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_rejects_oversized_category() {
|
||||
let config = AppConfig {
|
||||
ingest_max_category_bytes: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let result = validate_ingest_input(&config, None, "ok", "long", 0);
|
||||
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(IngestValidationError::PayloadTooLarge(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ingest_input_accepts_valid_payload() {
|
||||
let config = AppConfig::default();
|
||||
let result = validate_ingest_input(&config, Some("ok"), "ctx", "cat", 1);
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod config;
|
||||
pub mod embedding;
|
||||
pub mod ingest_limits;
|
||||
pub mod template_engine;
|
||||
|
||||
@@ -29,6 +29,11 @@ Minne can be configured via environment variables or a `config.yaml` file. Envir
|
||||
| `FASTEMBED_CACHE_DIR` | Model cache directory | `<data_dir>/fastembed` |
|
||||
| `FASTEMBED_SHOW_DOWNLOAD_PROGRESS` | Show progress bar for model downloads | `false` |
|
||||
| `FASTEMBED_MAX_LENGTH` | Max sequence length for FastEmbed models | - |
|
||||
| `INGEST_MAX_BODY_BYTES` | Max request body size for ingest endpoints | `20000000` |
|
||||
| `INGEST_MAX_FILES` | Max files allowed per ingest request | `5` |
|
||||
| `INGEST_MAX_CONTENT_BYTES` | Max `content` field size for ingest requests | `262144` |
|
||||
| `INGEST_MAX_CONTEXT_BYTES` | Max `context` field size for ingest requests | `16384` |
|
||||
| `INGEST_MAX_CATEGORY_BYTES` | Max `category` field size for ingest requests | `128` |
|
||||
|
||||
### S3 Storage (Optional)
|
||||
|
||||
@@ -76,6 +81,13 @@ embedding_backend: "fastembed"
|
||||
# Optional reranking
|
||||
reranking_enabled: true
|
||||
reranking_pool_size: 2
|
||||
|
||||
# Ingest safety limits
|
||||
ingest_max_body_bytes: 20000000
|
||||
ingest_max_files: 5
|
||||
ingest_max_content_bytes: 262144
|
||||
ingest_max_context_bytes: 16384
|
||||
ingest_max_category_bytes: 128
|
||||
```
|
||||
|
||||
## AI Provider Setup
|
||||
|
||||
@@ -35,7 +35,9 @@ where
|
||||
.add_protected_routes(routes::chat::router())
|
||||
.add_protected_routes(routes::content::router())
|
||||
.add_protected_routes(routes::knowledge::router())
|
||||
.add_protected_routes(routes::ingestion::router())
|
||||
.add_protected_routes(routes::ingestion::router(
|
||||
app_state.config.ingest_max_body_bytes,
|
||||
))
|
||||
.add_protected_routes(routes::scratchpad::router())
|
||||
.with_compression()
|
||||
.build()
|
||||
|
||||
@@ -2,9 +2,10 @@ use std::{pin::Pin, time::Duration};
|
||||
|
||||
use axum::{
|
||||
extract::{Query, State},
|
||||
http::StatusCode,
|
||||
response::{
|
||||
sse::{Event, KeepAlive},
|
||||
Html, IntoResponse, Sse,
|
||||
Html, IntoResponse, Response, Sse,
|
||||
},
|
||||
};
|
||||
use axum_typed_multipart::{FieldData, TryFromMultipart, TypedMultipart};
|
||||
@@ -23,6 +24,7 @@ use common::{
|
||||
ingestion_task::{IngestionTask, TaskState},
|
||||
user::User,
|
||||
},
|
||||
utils::ingest_limits::{validate_ingest_input, IngestValidationError},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
@@ -34,30 +36,32 @@ use crate::{
|
||||
AuthSessionType,
|
||||
};
|
||||
|
||||
pub async fn show_ingress_form(
|
||||
pub async fn show_ingest_form(
|
||||
State(state): State<HtmlState>,
|
||||
RequireUser(user): RequireUser,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
let user_categories = User::get_user_categories(&user.id, &state.db).await?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct ShowIngressFormData {
|
||||
pub struct ShowIngestFormData {
|
||||
user_categories: Vec<String>,
|
||||
}
|
||||
|
||||
Ok(TemplateResponse::new_template(
|
||||
"ingestion_modal.html",
|
||||
ShowIngressFormData { user_categories },
|
||||
ShowIngestFormData { user_categories },
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn hide_ingress_form(
|
||||
pub async fn hide_ingest_form(
|
||||
RequireUser(_user): RequireUser,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
Ok(Html(
|
||||
"<a class='btn btn-primary' hx-get='/ingress-form' hx-swap='outerHTML'>Add Content</a>",
|
||||
Ok(
|
||||
Html(
|
||||
"<a class='btn btn-primary' hx-get='/ingest-form' hx-swap='outerHTML'>Add Content</a>",
|
||||
)
|
||||
.into_response(),
|
||||
)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
#[derive(Debug, TryFromMultipart)]
|
||||
@@ -65,34 +69,22 @@ pub struct IngestionParams {
|
||||
pub content: Option<String>,
|
||||
pub context: String,
|
||||
pub category: String,
|
||||
#[form_data(limit = "10000000")] // Adjust limit as needed
|
||||
#[form_data(limit = "20000000")]
|
||||
#[form_data(default)]
|
||||
pub files: Vec<FieldData<NamedTempFile>>,
|
||||
}
|
||||
|
||||
pub async fn process_ingress_form(
|
||||
pub async fn process_ingest_form(
|
||||
State(state): State<HtmlState>,
|
||||
RequireUser(user): RequireUser,
|
||||
TypedMultipart(input): TypedMultipart<IngestionParams>,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
#[derive(Serialize)]
|
||||
pub struct IngressFormData {
|
||||
context: String,
|
||||
content: String,
|
||||
category: String,
|
||||
error: String,
|
||||
}
|
||||
|
||||
) -> Result<Response, HtmlError> {
|
||||
if input.content.as_ref().is_none_or(|c| c.len() < 2) && input.files.is_empty() {
|
||||
return Ok(TemplateResponse::new_template(
|
||||
"index/signed_in/ingress_form.html",
|
||||
IngressFormData {
|
||||
context: input.context.clone(),
|
||||
content: input.content.clone().unwrap_or_default(),
|
||||
category: input.category.clone(),
|
||||
error: "You need to either add files or content".to_string(),
|
||||
},
|
||||
));
|
||||
return Ok((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"You need to either add files or content",
|
||||
)
|
||||
.into_response());
|
||||
}
|
||||
|
||||
let content_bytes = input.content.as_ref().map_or(0, |c| c.len());
|
||||
@@ -101,6 +93,22 @@ pub async fn process_ingress_form(
|
||||
let category_bytes = input.category.len();
|
||||
let file_count = input.files.len();
|
||||
|
||||
match validate_ingest_input(
|
||||
&state.config,
|
||||
input.content.as_deref(),
|
||||
&input.context,
|
||||
&input.category,
|
||||
file_count,
|
||||
) {
|
||||
Ok(()) => {}
|
||||
Err(IngestValidationError::PayloadTooLarge(message)) => {
|
||||
return Ok((StatusCode::PAYLOAD_TOO_LARGE, message).into_response());
|
||||
}
|
||||
Err(IngestValidationError::BadRequest(message)) => {
|
||||
return Ok((StatusCode::BAD_REQUEST, message).into_response());
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
user_id = %user.id,
|
||||
has_content,
|
||||
@@ -108,7 +116,7 @@ pub async fn process_ingress_form(
|
||||
context_bytes,
|
||||
category_bytes,
|
||||
file_count,
|
||||
"Received ingestion form submission"
|
||||
"Received ingest form submission"
|
||||
);
|
||||
|
||||
let file_infos = try_join_all(input.files.into_iter().map(|file| {
|
||||
@@ -137,10 +145,10 @@ pub async fn process_ingress_form(
|
||||
tasks: Vec<IngestionTask>,
|
||||
}
|
||||
|
||||
Ok(TemplateResponse::new_template(
|
||||
"dashboard/current_task.html",
|
||||
NewTasksData { tasks },
|
||||
))
|
||||
Ok(
|
||||
TemplateResponse::new_template("dashboard/current_task.html", NewTasksData { tasks })
|
||||
.into_response(),
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
mod handlers;
|
||||
|
||||
use axum::{extract::FromRef, routing::get, Router};
|
||||
use handlers::{
|
||||
get_task_updates_stream, hide_ingress_form, process_ingress_form, show_ingress_form,
|
||||
};
|
||||
use axum::{extract::DefaultBodyLimit, extract::FromRef, routing::get, Router};
|
||||
use handlers::{get_task_updates_stream, hide_ingest_form, process_ingest_form, show_ingest_form};
|
||||
|
||||
use crate::html_state::HtmlState;
|
||||
|
||||
pub fn router<S>() -> Router<S>
|
||||
pub fn router<S>(max_body_bytes: usize) -> Router<S>
|
||||
where
|
||||
S: Clone + Send + Sync + 'static,
|
||||
HtmlState: FromRef<S>,
|
||||
{
|
||||
Router::new()
|
||||
.route(
|
||||
"/ingress-form",
|
||||
get(show_ingress_form).post(process_ingress_form),
|
||||
"/ingest-form",
|
||||
get(show_ingest_form)
|
||||
.post(process_ingest_form)
|
||||
.layer(DefaultBodyLimit::max(max_body_bytes)),
|
||||
)
|
||||
.route("/task/status-stream", get(get_task_updates_stream))
|
||||
.route("/hide-ingress-form", get(hide_ingress_form))
|
||||
.route("/hide-ingest-form", get(hide_ingest_form))
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
{% block dashboard_header %}
|
||||
<h1 class="text-xl font-extrabold tracking-tight">Dashboard</h1>
|
||||
<button class="nb-btn nb-cta" hx-get="/ingress-form" hx-target="#modal" hx-swap="innerHTML">
|
||||
<button class="nb-btn nb-cta" hx-get="/ingest-form" hx-target="#modal" hx-swap="innerHTML">
|
||||
{% include "icons/send_icon.html" %}
|
||||
<span class="ml-2">Add Content</span>
|
||||
</button>
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
{% block modal_class %}max-w-3xl{% endblock %}
|
||||
|
||||
{% block form_attributes %}
|
||||
hx-post="/ingress-form"
|
||||
hx-post="/ingest-form"
|
||||
enctype="multipart/form-data"
|
||||
{% endblock %}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
</li>
|
||||
{% endfor %}
|
||||
<li>
|
||||
<button class="nb-btn nb-cta w-full flex items-center gap-3 justify-start mt-2" hx-get="/ingress-form"
|
||||
<button class="nb-btn nb-cta w-full flex items-center gap-3 justify-start mt-2" hx-get="/ingest-form"
|
||||
hx-target="#modal" hx-swap="innerHTML">{% include "icons/send_icon.html" %} Add
|
||||
Content</button>
|
||||
</li>
|
||||
|
||||
Reference in New Issue
Block a user