feat: quick search knowledge entities

This commit is contained in:
Per Stark
2025-10-16 20:08:01 +02:00
parent 199186e5a3
commit 7332347f1a
4 changed files with 232 additions and 68 deletions

View File

@@ -40,6 +40,38 @@ impl From<String> for KnowledgeEntityType {
}
}
#[derive(Debug, Deserialize, Serialize)]
pub struct KnowledgeEntitySearchResult {
#[serde(deserialize_with = "deserialize_flexible_id")]
pub id: String,
#[serde(
serialize_with = "serialize_datetime",
deserialize_with = "deserialize_datetime",
default
)]
pub created_at: DateTime<Utc>,
#[serde(
serialize_with = "serialize_datetime",
deserialize_with = "deserialize_datetime",
default
)]
pub updated_at: DateTime<Utc>,
pub source_id: String,
pub name: String,
pub description: String,
pub entity_type: KnowledgeEntityType,
#[serde(default)]
pub metadata: Option<serde_json::Value>,
pub user_id: String,
pub score: f32,
#[serde(default)]
pub highlighted_name: Option<String>,
#[serde(default)]
pub highlighted_description: Option<String>,
}
stored_object!(KnowledgeEntity, "knowledge_entity", {
source_id: String,
name: String,
@@ -75,6 +107,50 @@ impl KnowledgeEntity {
}
}
pub async fn search(
db: &SurrealDbClient,
search_terms: &str,
user_id: &str,
limit: usize,
) -> Result<Vec<KnowledgeEntitySearchResult>, AppError> {
let sql = r#"
SELECT
id,
created_at,
updated_at,
source_id,
name,
description,
entity_type,
metadata,
user_id,
search::highlight('<b>', '</b>', 0) AS highlighted_name,
search::highlight('<b>', '</b>', 1) AS highlighted_description,
(
IF search::score(0) != NONE THEN search::score(0) ELSE 0 END +
IF search::score(1) != NONE THEN search::score(1) ELSE 0 END
) AS score
FROM knowledge_entity
WHERE
(
name @0@ $terms OR
description @1@ $terms
)
AND user_id = $user_id
ORDER BY score DESC
LIMIT $limit;
"#;
Ok(db
.client
.query(sql)
.bind(("terms", search_terms.to_owned()))
.bind(("user_id", user_id.to_owned()))
.bind(("limit", limit))
.await?
.take(0)?)
}
pub async fn delete_by_source_id(
source_id: &str,
db_client: &SurrealDbClient,

File diff suppressed because one or more lines are too long

View File

@@ -6,9 +6,11 @@ use axum::{
};
use common::storage::types::{
conversation::Conversation,
knowledge_entity::{KnowledgeEntity, KnowledgeEntitySearchResult},
text_content::{TextContent, TextContentSearchResult},
user::User,
};
use futures::future::try_join;
use serde::{de, Deserialize, Deserializer, Serialize};
use crate::{
@@ -43,9 +45,19 @@ pub async fn search_result_handler(
Query(params): Query<SearchParams>,
RequireUser(user): RequireUser,
) -> Result<impl IntoResponse, HtmlError> {
#[derive(Serialize)]
struct SearchResultForTemplate {
result_type: String,
score: f32,
#[serde(skip_serializing_if = "Option::is_none")]
text_content: Option<TextContentSearchResult>,
#[serde(skip_serializing_if = "Option::is_none")]
knowledge_entity: Option<KnowledgeEntitySearchResult>,
}
#[derive(Serialize)]
pub struct AnswerData {
search_result: Vec<TextContentSearchResult>,
search_result: Vec<SearchResultForTemplate>,
query_param: String,
user: User,
conversation_archive: Vec<Conversation>,
@@ -56,17 +68,45 @@ pub async fn search_result_handler(
if let Some(actual_query) = params.query {
let trimmed_query = actual_query.trim();
if trimmed_query.is_empty() {
(Vec::new(), String::new())
(Vec::<SearchResultForTemplate>::new(), String::new())
} else {
match TextContent::search(&state.db, trimmed_query, &user.id, 5).await {
Ok(results) => (results, trimmed_query.to_string()),
Err(e) => {
return Err(HtmlError::from(e));
}
const TOTAL_LIMIT: usize = 10;
let (text_results, entity_results) = try_join(
TextContent::search(&state.db, trimmed_query, &user.id, TOTAL_LIMIT),
KnowledgeEntity::search(&state.db, trimmed_query, &user.id, TOTAL_LIMIT),
)
.await?;
let mut combined_results: Vec<SearchResultForTemplate> =
Vec::with_capacity(text_results.len() + entity_results.len());
for text_result in text_results {
let score = text_result.score;
combined_results.push(SearchResultForTemplate {
result_type: "text_content".to_string(),
score,
text_content: Some(text_result),
knowledge_entity: None,
});
}
for entity_result in entity_results {
let score = entity_result.score;
combined_results.push(SearchResultForTemplate {
result_type: "knowledge_entity".to_string(),
score,
text_content: None,
knowledge_entity: Some(entity_result),
});
}
combined_results.sort_by(|a, b| b.score.total_cmp(&a.score));
combined_results.truncate(TOTAL_LIMIT);
(combined_results, trimmed_query.to_string())
}
} else {
(Vec::new(), String::new())
(Vec::<SearchResultForTemplate>::new(), String::new())
};
Ok(TemplateResponse::new_template(

View File

@@ -2,72 +2,120 @@
<ul class="nb-card p-0">
{% for result in search_result %}
<li class="p-4 u-hairline hover:bg-base-200/40 flex gap-3">
<div class="w-10 h-10 flex-shrink-0 self-start mt-1 grid place-items-center border-2 border-neutral bg-base-100 shadow-[4px_4px_0_0_#000]">
{% if result.url_info and result.url_info.url %}
<div class="tooltip tooltip-right" data-tip="Web Link">
{% include "icons/link_icon.html" %}
</div>
{% elif result.file_info and result.file_info.file_name %}
<div class="tooltip tooltip-right" data-tip="File Document">
{% include "icons/document_icon.html" %}
</div>
{% else %}
<div class="tooltip tooltip-right" data-tip="Text Content">
{% include "icons/bars_icon.html" %}
</div>
{% endif %}
</div>
<div class="flex-1 min-w-0">
<h3 class="text-lg font-extrabold mb-1 leading-snug">
<a hx-get="/content/{{ result.id }}/read" hx-target="#modal" hx-swap="innerHTML" class="nb-link">
{% set title_text = result.highlighted_url_title
| default(result.url_info.title if result.url_info else none, true)
| default(result.highlighted_file_name, true)
| default(result.file_info.file_name if result.file_info else none, true)
| default("Text snippet: " ~ (result.id | string)[-8:], true) %}
{{ title_text | safe }}
</a>
</h3>
<div class="markdown-content prose-tufte-compact text-base-content/80 mb-3 overflow-hidden line-clamp-6" data-content="{{result.highlighted_text | escape}}">
{% if result.highlighted_text %}
{{ result.highlighted_text | escape }}
{% elif result.text %}
{{ result.text | escape }}
{% if result.result_type == "text_content" %}
{% set tc = result.text_content %}
<div class="w-10 h-10 flex-shrink-0 self-start mt-1 grid place-items-center border-2 border-neutral bg-base-100 shadow-[4px_4px_0_0_#000]">
{% if tc.url_info and tc.url_info.url %}
<div class="tooltip tooltip-right" data-tip="Web Link">
{% include "icons/link_icon.html" %}
</div>
{% elif tc.file_info and tc.file_info.file_name %}
<div class="tooltip tooltip-right" data-tip="File Document">
{% include "icons/document_icon.html" %}
</div>
{% else %}
<span class="italic opacity-60">No text preview available.</span>
<div class="tooltip tooltip-right" data-tip="Text Content">
{% include "icons/bars_icon.html" %}
</div>
{% endif %}
</div>
<div class="text-xs flex flex-wrap gap-x-4 gap-y-2 items-center">
<span class="inline-flex items-center">
<span class="uppercase tracking-wide opacity-60 mr-2">Category</span>
<span class="nb-badge">{{ result.highlighted_category | default(result.category, true) | safe }}</span>
</span>
{% if result.highlighted_context or result.context %}
<span class="inline-flex items-center min-w-0">
<span class="uppercase tracking-wide opacity-60 mr-2">Context</span>
<span class="nb-badge">{{ result.highlighted_context | default(result.context, true) | safe }}</span>
</span>
{% endif %}
{% if result.url_info and result.url_info.url %}
<span class="inline-flex items-center min-w-0">
<span class="uppercase tracking-wide opacity-60 mr-2">Source</span>
<a href="{{ result.url_info.url }}" target="_blank" class="nb-link truncate" title="{{ result.url_info.url }}">
{{ result.highlighted_url | default(result.url_info.url ) | safe }}
<div class="flex-1 min-w-0">
<h3 class="text-lg font-extrabold mb-1 leading-snug">
<a hx-get="/content/{{ tc.id }}/read" hx-target="#modal" hx-swap="innerHTML" class="nb-link">
{% set title_text = tc.highlighted_url_title
| default(tc.url_info.title if tc.url_info else none, true)
| default(tc.highlighted_file_name, true)
| default(tc.file_info.file_name if tc.file_info else none, true)
| default("Text snippet: " ~ (tc.id | string)[-8:], true) %}
{{ title_text | safe }}
</a>
</span>
{% endif %}
</h3>
<span class="inline-flex items-center">
<span class="uppercase tracking-wide opacity-60 mr-2">Score</span>
<span class="nb-badge">{{ result.score }}</span>
</span>
<div class="markdown-content prose-tufte-compact text-base-content/80 mb-3 overflow-hidden line-clamp-6" data-content="{{tc.highlighted_text | escape}}">
{% if tc.highlighted_text %}
{{ tc.highlighted_text | escape }}
{% elif tc.text %}
{{ tc.text | escape }}
{% else %}
<span class="italic opacity-60">No text preview available.</span>
{% endif %}
</div>
<div class="text-xs flex flex-wrap gap-x-4 gap-y-2 items-center">
<span class="inline-flex items-center">
<span class="uppercase tracking-wide opacity-60 mr-2">Category</span>
<span class="nb-badge">{{ tc.highlighted_category | default(tc.category, true) | safe }}</span>
</span>
{% if tc.highlighted_context or tc.context %}
<span class="inline-flex items-center min-w-0">
<span class="uppercase tracking-wide opacity-60 mr-2">Context</span>
<span class="nb-badge">{{ tc.highlighted_context | default(tc.context, true) | safe }}</span>
</span>
{% endif %}
{% if tc.url_info and tc.url_info.url %}
<span class="inline-flex items-center min-w-0">
<span class="uppercase tracking-wide opacity-60 mr-2">Source</span>
<a href="{{ tc.url_info.url }}" target="_blank" class="nb-link truncate" title="{{ tc.url_info.url }}">
{{ tc.highlighted_url | default(tc.url_info.url ) | safe }}
</a>
</span>
{% endif %}
<span class="inline-flex items-center">
<span class="uppercase tracking-wide opacity-60 mr-2">Score</span>
<span class="nb-badge">{{ result.score }}</span>
</span>
</div>
</div>
</div>
{% elif result.result_type == "knowledge_entity" %}
{% set entity = result.knowledge_entity %}
<div class="w-10 h-10 flex-shrink-0 self-start mt-1 grid place-items-center border-2 border-neutral bg-base-100 shadow-[4px_4px_0_0_#000]">
<div class="tooltip tooltip-right" data-tip="Knowledge Entity">
{% include "icons/book_icon.html" %}
</div>
</div>
<div class="flex-1 min-w-0">
<h3 class="text-lg font-extrabold mb-1 leading-snug">
<a hx-get="/knowledge-entity/{{ entity.id }}" hx-target="#modal" hx-swap="innerHTML" class="nb-link">
{% set entity_title = entity.highlighted_name | default(entity.name, true) %}
{{ entity_title | safe }}
</a>
</h3>
<div class="prose prose-tufte-compact text-base-content/80 mb-3 overflow-hidden line-clamp-6">
{% if entity.highlighted_description %}
{{ entity.highlighted_description | safe }}
{% elif entity.description %}
{{ entity.description | escape }}
{% else %}
<span class="italic opacity-60">No description available.</span>
{% endif %}
</div>
<div class="text-xs flex flex-wrap gap-x-4 gap-y-2 items-center">
<span class="inline-flex items-center">
<span class="uppercase tracking-wide opacity-60 mr-2">Entity Type</span>
<span class="nb-badge">{{ entity.entity_type }}</span>
</span>
{% if entity.source_id %}
<span class="inline-flex items-center min-w-0">
<span class="uppercase tracking-wide opacity-60 mr-2">Source ID</span>
<span class="nb-badge truncate max-w-xs" title="{{ entity.source_id }}">{{ entity.source_id }}</span>
</span>
{% endif %}
<span class="inline-flex items-center">
<span class="uppercase tracking-wide opacity-60 mr-2">Score</span>
<span class="nb-badge">{{ result.score }}</span>
</span>
</div>
</div>
{% endif %}
</li>
{% endfor %}
</ul>