chore: improve html-router auth, caching, and analytics while centralizing search labels in common.

small fix
This commit is contained in:
Per Stark
2026-05-29 14:42:20 +02:00
parent d3443d4153
commit 2aa92b6ad7
27 changed files with 510 additions and 344 deletions
+2 -3
View File
@@ -9,7 +9,7 @@ use tracing::{debug, info, warn};
use crate::{error::AppError, storage::db::SurrealDbClient};
const INDEX_POLL_INTERVAL: Duration = Duration::from_millis(50);
const INDEX_BUILD_TIMEOUT: Duration = Duration::from_secs(30 * 60);
const INDEX_BUILD_TIMEOUT: Duration = Duration::from_mins(30);
const FTS_ANALYZER_NAME: &str = "app_en_fts_analyzer";
/// HNSW index options used by runtime index creation (includes CONCURRENTLY).
@@ -537,8 +537,7 @@ async fn poll_index_build_status(
INDEX_BUILD_TIMEOUT,
last_snapshot
.as_ref()
.map(|snapshot| snapshot.status.as_str())
.unwrap_or("unknown")
.map_or("unknown", |snapshot| snapshot.status.as_str())
))
.with_context(|| format!("index {index_name} on table {table} did not become ready"));
}
+28 -1
View File
@@ -62,12 +62,22 @@ impl Analytics {
}
pub async fn increment_page_loads(db: &SurrealDbClient) -> Result<Self, AppError> {
Self::record_page_view(db, false).await
}
/// Records a page view, optionally counting the visitor as new.
pub async fn record_page_view(
db: &SurrealDbClient,
is_new_visitor: bool,
) -> Result<Self, AppError> {
let visitor_delta = i64::from(is_new_visitor);
let updated: Option<Self> = db
.client
.query(
"UPSERT type::thing('analytics', $id) SET page_loads = (page_loads ?? 0) + 1, visitors = visitors ?? 0 RETURN AFTER",
"UPSERT type::thing('analytics', $id) SET page_loads = (page_loads ?? 0) + 1, visitors = (visitors ?? 0) + $visitor_delta RETURN AFTER",
)
.bind(("id", Self::RECORD_ID))
.bind(("visitor_delta", visitor_delta))
.await?
.take(0)?;
@@ -281,6 +291,23 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn test_record_page_view() -> anyhow::Result<()> {
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database).await?;
let first_view = Analytics::record_page_view(&db, true).await?;
assert_eq!(first_view.visitors, 1);
assert_eq!(first_view.page_loads, 1);
let returning_view = Analytics::record_page_view(&db, false).await?;
assert_eq!(returning_view.visitors, 1);
assert_eq!(returning_view.page_loads, 2);
Ok(())
}
#[tokio::test]
async fn test_get_current_nonexistent() -> anyhow::Result<()> {
// Setup in-memory database for testing
+2 -4
View File
@@ -26,6 +26,7 @@ pub struct SystemSettings {
/// Partial update for singleton system settings without cloning unchanged fields.
#[derive(Debug, Default, Clone)]
#[allow(clippy::module_name_repetitions)]
pub struct SystemSettingsPatch {
pub registrations_enabled: Option<bool>,
pub require_email_verification: Option<bool>,
@@ -92,7 +93,6 @@ impl SystemSettingsPatch {
}
}
#[must_use]
pub async fn apply(self, db: &SurrealDbClient) -> Result<SystemSettings, AppError> {
let mut current = SystemSettings::get_current(db).await?;
self.apply_to(&mut current);
@@ -103,6 +103,7 @@ impl SystemSettingsPatch {
impl SystemSettings {
pub const RECORD_ID: &'static str = "current";
#[allow(clippy::result_large_err)]
fn validate(&self) -> Result<(), AppError> {
if self.embedding_dimensions == 0 {
return Err(AppError::Validation(
@@ -137,13 +138,11 @@ impl SystemSettings {
Ok(())
}
#[must_use]
pub async fn get_current(db: &SurrealDbClient) -> Result<Self, AppError> {
let settings: Option<Self> = db.get_item(Self::RECORD_ID).await?;
settings.ok_or(AppError::NotFound("system settings not found".into()))
}
#[must_use]
pub async fn update(db: &SurrealDbClient, changes: Self) -> Result<Self, AppError> {
Self::update_with_mode(db, changes, UpdateMode::User).await
}
@@ -176,7 +175,6 @@ impl SystemSettings {
/// Syncs SystemSettings with the active embedding provider's properties.
/// Updates embedding_backend, embedding_model, and embedding_dimensions if they differ.
/// Returns true if any settings were changed.
#[must_use]
pub async fn sync_from_embedding_provider(
db: &SurrealDbClient,
provider: &crate::utils::embedding::EmbeddingProvider,
+199
View File
@@ -1,4 +1,8 @@
use std::collections::{HashMap, HashSet};
use std::str::FromStr;
use surrealdb::opt::PatchOp;
use surrealdb::RecordId;
use uuid::Uuid;
use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
@@ -194,6 +198,169 @@ impl TextContent {
.take(0)
.map_err(AppError::Database)
}
/// Builds a fallback display label for a source id when no matching content row exists.
#[must_use]
pub fn fallback_source_label(source_id: &str) -> String {
format!("Text snippet: {}", source_id_suffix(source_id))
}
/// Resolves human-readable labels for the given source ids owned by `user_id`.
pub async fn resolve_source_labels(
db: &SurrealDbClient,
user_id: &str,
source_ids: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<HashMap<String, String>, AppError> {
let source_ids: HashSet<String> = source_ids
.into_iter()
.map(|id| id.as_ref().to_string())
.collect();
if source_ids.is_empty() {
return Ok(HashMap::new());
}
let record_ids: Vec<RecordId> = source_ids
.iter()
.filter_map(|id| {
if id.contains(':') {
RecordId::from_str(id).ok()
} else {
Some(RecordId::from_table_key(Self::table_name(), id))
}
})
.collect();
let mut response = db
.client
.query(
"SELECT id, url_info, file_info, context, category, text FROM type::table($table_name) WHERE user_id = $user_id AND id INSIDE $record_ids",
)
.bind(("table_name", Self::table_name()))
.bind(("user_id", user_id.to_owned()))
.bind(("record_ids", record_ids))
.await
.map_err(AppError::Database)?;
let contents: Vec<SourceLabelRow> = response.take(0).map_err(AppError::Database)?;
tracing::debug!(
source_id_count = source_ids.len(),
label_row_count = contents.len(),
"resolved source labels"
);
let mut labels = HashMap::new();
for content in contents {
let label = build_source_label(&content);
labels.insert(content.id.clone(), label.clone());
labels.insert(
format!("{}:{}", Self::table_name(), content.id),
label,
);
}
Ok(labels)
}
}
const SOURCE_LABEL_MAX_CHARS: usize = 80;
#[derive(Deserialize)]
struct SourceLabelRow {
#[serde(deserialize_with = "deserialize_flexible_id")]
id: String,
#[serde(default)]
url_info: Option<UrlInfo>,
#[serde(default)]
file_info: Option<FileInfo>,
#[serde(default)]
context: Option<String>,
#[serde(default)]
category: String,
#[serde(default)]
text: String,
}
fn source_id_suffix(source_id: &str) -> String {
let start = source_id.len().saturating_sub(8);
source_id[start..].to_string()
}
fn truncate_with_ellipsis(value: &str, max_chars: usize) -> String {
const ELLIPSIS: &str = "";
if max_chars == 0 {
return if value.is_empty() {
String::new()
} else {
ELLIPSIS.to_string()
};
}
let mut end_byte = value.len();
for (count, (idx, _)) in value.char_indices().enumerate() {
if count == max_chars {
end_byte = idx;
break;
}
}
if end_byte == value.len() {
return value.to_string();
}
format!("{}{}", &value[..end_byte], ELLIPSIS)
}
fn first_non_empty_line(text: &str, max_chars: usize) -> Option<String> {
text.lines().find_map(|line| {
let trimmed = line.trim();
if trimmed.is_empty() {
None
} else {
Some(truncate_with_ellipsis(trimmed, max_chars))
}
})
}
fn build_source_label(row: &SourceLabelRow) -> String {
if let Some(url_info) = row.url_info.as_ref() {
let title = url_info.title.trim();
if !title.is_empty() {
return title.to_string();
}
let url = url_info.url.trim();
if !url.is_empty() {
return url.to_string();
}
}
if let Some(file_info) = row.file_info.as_ref() {
let name = file_info.file_name.trim();
if !name.is_empty() {
return name.to_string();
}
}
if let Some(context) = row.context.as_ref() {
let trimmed = context.trim();
if !trimmed.is_empty() {
return truncate_with_ellipsis(trimmed, SOURCE_LABEL_MAX_CHARS);
}
}
if let Some(text_label) = first_non_empty_line(&row.text, SOURCE_LABEL_MAX_CHARS) {
return text_label;
}
let category = row.category.trim();
if !category.is_empty() {
return truncate_with_ellipsis(category, SOURCE_LABEL_MAX_CHARS);
}
TextContent::fallback_source_label(&row.id)
}
#[cfg(test)]
@@ -444,4 +611,36 @@ mod tests {
assert!(row.score.is_finite());
Ok(())
}
#[tokio::test]
async fn test_resolve_source_labels_uses_url_title() -> anyhow::Result<()> {
let db = setup_test_db_with_runtime_indexes().await?;
let user_id = "label_user";
let content = TextContent::new(
"body".to_string(),
None,
"notes".to_string(),
None,
Some(UrlInfo {
url: "https://example.com/doc".to_string(),
title: "Example Document".to_string(),
image_id: String::new(),
}),
user_id.to_string(),
);
db.store_item(content.clone()).await?;
let labels = TextContent::resolve_source_labels(&db, user_id, [content.id.clone()]).await?;
assert_eq!(
labels.get(&content.id),
Some(&"Example Document".to_string())
);
assert_eq!(
labels.get(&format!("text_content:{}", content.id)),
Some(&"Example Document".to_string())
);
Ok(())
}
}
+2
View File
@@ -338,6 +338,8 @@ pub fn get_config() -> Result<AppConfig, ConfigError> {
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used)]
use super::{ParseRetrievalStrategyError, RetrievalStrategy};
#[test]
fn retrieval_strategy_defaults_to_default() {
+3
View File
@@ -15,6 +15,7 @@ use crate::{
utils::config::AppConfig,
};
#[allow(clippy::module_name_repetitions)]
pub use crate::utils::config::{EmbeddingBackend, ParseEmbeddingBackendError};
/// Wrapper around the chosen embedding backend.
@@ -431,6 +432,8 @@ pub async fn generate_embedding_with_params(
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used)]
use super::{EmbeddingBackend, ParseEmbeddingBackendError};
use crate::storage::types::system_settings::SystemSettings;
use serde_json::json;
+4 -1
View File
@@ -29,7 +29,10 @@ pub fn validate_ingest_input(
category: &str,
file_count: usize,
) -> Result<(), IngestValidationError> {
let text_field_bytes = content.map(str::len).unwrap_or(0) + ctx.len() + category.len();
let content_bytes = content.map_or(0, str::len);
let text_field_bytes = content_bytes
.saturating_add(ctx.len())
.saturating_add(category.len());
if text_field_bytes > config.ingest_max_body_bytes {
return Err(IngestValidationError::PayloadTooLarge(format!(
"request text fields exceed maximum allowed body size of {} bytes",