mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-29 19:00:51 +02:00
chore: improve html-router auth, caching, and analytics while centralizing search labels in common.
small fix
This commit is contained in:
@@ -9,7 +9,7 @@ use tracing::{debug, info, warn};
|
||||
use crate::{error::AppError, storage::db::SurrealDbClient};
|
||||
|
||||
const INDEX_POLL_INTERVAL: Duration = Duration::from_millis(50);
|
||||
const INDEX_BUILD_TIMEOUT: Duration = Duration::from_secs(30 * 60);
|
||||
const INDEX_BUILD_TIMEOUT: Duration = Duration::from_mins(30);
|
||||
const FTS_ANALYZER_NAME: &str = "app_en_fts_analyzer";
|
||||
|
||||
/// HNSW index options used by runtime index creation (includes CONCURRENTLY).
|
||||
@@ -537,8 +537,7 @@ async fn poll_index_build_status(
|
||||
INDEX_BUILD_TIMEOUT,
|
||||
last_snapshot
|
||||
.as_ref()
|
||||
.map(|snapshot| snapshot.status.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.map_or("unknown", |snapshot| snapshot.status.as_str())
|
||||
))
|
||||
.with_context(|| format!("index {index_name} on table {table} did not become ready"));
|
||||
}
|
||||
|
||||
@@ -62,12 +62,22 @@ impl Analytics {
|
||||
}
|
||||
|
||||
pub async fn increment_page_loads(db: &SurrealDbClient) -> Result<Self, AppError> {
|
||||
Self::record_page_view(db, false).await
|
||||
}
|
||||
|
||||
/// Records a page view, optionally counting the visitor as new.
|
||||
pub async fn record_page_view(
|
||||
db: &SurrealDbClient,
|
||||
is_new_visitor: bool,
|
||||
) -> Result<Self, AppError> {
|
||||
let visitor_delta = i64::from(is_new_visitor);
|
||||
let updated: Option<Self> = db
|
||||
.client
|
||||
.query(
|
||||
"UPSERT type::thing('analytics', $id) SET page_loads = (page_loads ?? 0) + 1, visitors = visitors ?? 0 RETURN AFTER",
|
||||
"UPSERT type::thing('analytics', $id) SET page_loads = (page_loads ?? 0) + 1, visitors = (visitors ?? 0) + $visitor_delta RETURN AFTER",
|
||||
)
|
||||
.bind(("id", Self::RECORD_ID))
|
||||
.bind(("visitor_delta", visitor_delta))
|
||||
.await?
|
||||
.take(0)?;
|
||||
|
||||
@@ -281,6 +291,23 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_record_page_view() -> anyhow::Result<()> {
|
||||
let namespace = "test_ns";
|
||||
let database = &Uuid::new_v4().to_string();
|
||||
let db = SurrealDbClient::memory(namespace, database).await?;
|
||||
|
||||
let first_view = Analytics::record_page_view(&db, true).await?;
|
||||
assert_eq!(first_view.visitors, 1);
|
||||
assert_eq!(first_view.page_loads, 1);
|
||||
|
||||
let returning_view = Analytics::record_page_view(&db, false).await?;
|
||||
assert_eq!(returning_view.visitors, 1);
|
||||
assert_eq!(returning_view.page_loads, 2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_current_nonexistent() -> anyhow::Result<()> {
|
||||
// Setup in-memory database for testing
|
||||
|
||||
@@ -26,6 +26,7 @@ pub struct SystemSettings {
|
||||
|
||||
/// Partial update for singleton system settings without cloning unchanged fields.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
#[allow(clippy::module_name_repetitions)]
|
||||
pub struct SystemSettingsPatch {
|
||||
pub registrations_enabled: Option<bool>,
|
||||
pub require_email_verification: Option<bool>,
|
||||
@@ -92,7 +93,6 @@ impl SystemSettingsPatch {
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub async fn apply(self, db: &SurrealDbClient) -> Result<SystemSettings, AppError> {
|
||||
let mut current = SystemSettings::get_current(db).await?;
|
||||
self.apply_to(&mut current);
|
||||
@@ -103,6 +103,7 @@ impl SystemSettingsPatch {
|
||||
impl SystemSettings {
|
||||
pub const RECORD_ID: &'static str = "current";
|
||||
|
||||
#[allow(clippy::result_large_err)]
|
||||
fn validate(&self) -> Result<(), AppError> {
|
||||
if self.embedding_dimensions == 0 {
|
||||
return Err(AppError::Validation(
|
||||
@@ -137,13 +138,11 @@ impl SystemSettings {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub async fn get_current(db: &SurrealDbClient) -> Result<Self, AppError> {
|
||||
let settings: Option<Self> = db.get_item(Self::RECORD_ID).await?;
|
||||
settings.ok_or(AppError::NotFound("system settings not found".into()))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub async fn update(db: &SurrealDbClient, changes: Self) -> Result<Self, AppError> {
|
||||
Self::update_with_mode(db, changes, UpdateMode::User).await
|
||||
}
|
||||
@@ -176,7 +175,6 @@ impl SystemSettings {
|
||||
/// Syncs SystemSettings with the active embedding provider's properties.
|
||||
/// Updates embedding_backend, embedding_model, and embedding_dimensions if they differ.
|
||||
/// Returns true if any settings were changed.
|
||||
#[must_use]
|
||||
pub async fn sync_from_embedding_provider(
|
||||
db: &SurrealDbClient,
|
||||
provider: &crate::utils::embedding::EmbeddingProvider,
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::str::FromStr;
|
||||
|
||||
use surrealdb::opt::PatchOp;
|
||||
use surrealdb::RecordId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
|
||||
@@ -194,6 +198,169 @@ impl TextContent {
|
||||
.take(0)
|
||||
.map_err(AppError::Database)
|
||||
}
|
||||
|
||||
/// Builds a fallback display label for a source id when no matching content row exists.
|
||||
#[must_use]
|
||||
pub fn fallback_source_label(source_id: &str) -> String {
|
||||
format!("Text snippet: {}", source_id_suffix(source_id))
|
||||
}
|
||||
|
||||
/// Resolves human-readable labels for the given source ids owned by `user_id`.
|
||||
pub async fn resolve_source_labels(
|
||||
db: &SurrealDbClient,
|
||||
user_id: &str,
|
||||
source_ids: impl IntoIterator<Item = impl AsRef<str>>,
|
||||
) -> Result<HashMap<String, String>, AppError> {
|
||||
let source_ids: HashSet<String> = source_ids
|
||||
.into_iter()
|
||||
.map(|id| id.as_ref().to_string())
|
||||
.collect();
|
||||
|
||||
if source_ids.is_empty() {
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
|
||||
let record_ids: Vec<RecordId> = source_ids
|
||||
.iter()
|
||||
.filter_map(|id| {
|
||||
if id.contains(':') {
|
||||
RecordId::from_str(id).ok()
|
||||
} else {
|
||||
Some(RecordId::from_table_key(Self::table_name(), id))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut response = db
|
||||
.client
|
||||
.query(
|
||||
"SELECT id, url_info, file_info, context, category, text FROM type::table($table_name) WHERE user_id = $user_id AND id INSIDE $record_ids",
|
||||
)
|
||||
.bind(("table_name", Self::table_name()))
|
||||
.bind(("user_id", user_id.to_owned()))
|
||||
.bind(("record_ids", record_ids))
|
||||
.await
|
||||
.map_err(AppError::Database)?;
|
||||
|
||||
let contents: Vec<SourceLabelRow> = response.take(0).map_err(AppError::Database)?;
|
||||
|
||||
tracing::debug!(
|
||||
source_id_count = source_ids.len(),
|
||||
label_row_count = contents.len(),
|
||||
"resolved source labels"
|
||||
);
|
||||
|
||||
let mut labels = HashMap::new();
|
||||
for content in contents {
|
||||
let label = build_source_label(&content);
|
||||
labels.insert(content.id.clone(), label.clone());
|
||||
labels.insert(
|
||||
format!("{}:{}", Self::table_name(), content.id),
|
||||
label,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(labels)
|
||||
}
|
||||
}
|
||||
|
||||
const SOURCE_LABEL_MAX_CHARS: usize = 80;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SourceLabelRow {
|
||||
#[serde(deserialize_with = "deserialize_flexible_id")]
|
||||
id: String,
|
||||
#[serde(default)]
|
||||
url_info: Option<UrlInfo>,
|
||||
#[serde(default)]
|
||||
file_info: Option<FileInfo>,
|
||||
#[serde(default)]
|
||||
context: Option<String>,
|
||||
#[serde(default)]
|
||||
category: String,
|
||||
#[serde(default)]
|
||||
text: String,
|
||||
}
|
||||
|
||||
fn source_id_suffix(source_id: &str) -> String {
|
||||
let start = source_id.len().saturating_sub(8);
|
||||
source_id[start..].to_string()
|
||||
}
|
||||
|
||||
fn truncate_with_ellipsis(value: &str, max_chars: usize) -> String {
|
||||
const ELLIPSIS: &str = "…";
|
||||
|
||||
if max_chars == 0 {
|
||||
return if value.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
ELLIPSIS.to_string()
|
||||
};
|
||||
}
|
||||
|
||||
let mut end_byte = value.len();
|
||||
for (count, (idx, _)) in value.char_indices().enumerate() {
|
||||
if count == max_chars {
|
||||
end_byte = idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if end_byte == value.len() {
|
||||
return value.to_string();
|
||||
}
|
||||
|
||||
format!("{}{}", &value[..end_byte], ELLIPSIS)
|
||||
}
|
||||
|
||||
fn first_non_empty_line(text: &str, max_chars: usize) -> Option<String> {
|
||||
text.lines().find_map(|line| {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(truncate_with_ellipsis(trimmed, max_chars))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn build_source_label(row: &SourceLabelRow) -> String {
|
||||
if let Some(url_info) = row.url_info.as_ref() {
|
||||
let title = url_info.title.trim();
|
||||
if !title.is_empty() {
|
||||
return title.to_string();
|
||||
}
|
||||
|
||||
let url = url_info.url.trim();
|
||||
if !url.is_empty() {
|
||||
return url.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(file_info) = row.file_info.as_ref() {
|
||||
let name = file_info.file_name.trim();
|
||||
if !name.is_empty() {
|
||||
return name.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(context) = row.context.as_ref() {
|
||||
let trimmed = context.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return truncate_with_ellipsis(trimmed, SOURCE_LABEL_MAX_CHARS);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(text_label) = first_non_empty_line(&row.text, SOURCE_LABEL_MAX_CHARS) {
|
||||
return text_label;
|
||||
}
|
||||
|
||||
let category = row.category.trim();
|
||||
if !category.is_empty() {
|
||||
return truncate_with_ellipsis(category, SOURCE_LABEL_MAX_CHARS);
|
||||
}
|
||||
|
||||
TextContent::fallback_source_label(&row.id)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -444,4 +611,36 @@ mod tests {
|
||||
assert!(row.score.is_finite());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_resolve_source_labels_uses_url_title() -> anyhow::Result<()> {
|
||||
let db = setup_test_db_with_runtime_indexes().await?;
|
||||
let user_id = "label_user";
|
||||
|
||||
let content = TextContent::new(
|
||||
"body".to_string(),
|
||||
None,
|
||||
"notes".to_string(),
|
||||
None,
|
||||
Some(UrlInfo {
|
||||
url: "https://example.com/doc".to_string(),
|
||||
title: "Example Document".to_string(),
|
||||
image_id: String::new(),
|
||||
}),
|
||||
user_id.to_string(),
|
||||
);
|
||||
db.store_item(content.clone()).await?;
|
||||
|
||||
let labels = TextContent::resolve_source_labels(&db, user_id, [content.id.clone()]).await?;
|
||||
|
||||
assert_eq!(
|
||||
labels.get(&content.id),
|
||||
Some(&"Example Document".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
labels.get(&format!("text_content:{}", content.id)),
|
||||
Some(&"Example Document".to_string())
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,6 +338,8 @@ pub fn get_config() -> Result<AppConfig, ConfigError> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use super::{ParseRetrievalStrategyError, RetrievalStrategy};
|
||||
#[test]
|
||||
fn retrieval_strategy_defaults_to_default() {
|
||||
|
||||
@@ -15,6 +15,7 @@ use crate::{
|
||||
utils::config::AppConfig,
|
||||
};
|
||||
|
||||
#[allow(clippy::module_name_repetitions)]
|
||||
pub use crate::utils::config::{EmbeddingBackend, ParseEmbeddingBackendError};
|
||||
|
||||
/// Wrapper around the chosen embedding backend.
|
||||
@@ -431,6 +432,8 @@ pub async fn generate_embedding_with_params(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use super::{EmbeddingBackend, ParseEmbeddingBackendError};
|
||||
use crate::storage::types::system_settings::SystemSettings;
|
||||
use serde_json::json;
|
||||
|
||||
@@ -29,7 +29,10 @@ pub fn validate_ingest_input(
|
||||
category: &str,
|
||||
file_count: usize,
|
||||
) -> Result<(), IngestValidationError> {
|
||||
let text_field_bytes = content.map(str::len).unwrap_or(0) + ctx.len() + category.len();
|
||||
let content_bytes = content.map_or(0, str::len);
|
||||
let text_field_bytes = content_bytes
|
||||
.saturating_add(ctx.len())
|
||||
.saturating_add(category.len());
|
||||
if text_field_bytes > config.ingest_max_body_bytes {
|
||||
return Err(IngestValidationError::PayloadTooLarge(format!(
|
||||
"request text fields exceed maximum allowed body size of {} bytes",
|
||||
|
||||
Reference in New Issue
Block a user