refactor: extract generic ensure_fts_index helper

This commit is contained in:
Per Stark
2026-06-18 21:55:18 +02:00
parent 530cd0a8f1
commit 01ef1bcb7a
4 changed files with 68 additions and 50 deletions
+19 -25
View File
@@ -496,7 +496,7 @@ mod tests {
use super::*;
use crate::storage::indexes::rebuild;
use crate::storage::types::knowledge_entity_embedding::KnowledgeEntityEmbedding;
use crate::test_utils::{prepare_knowledge_entity_test_db, setup_test_db};
use crate::test_utils::{ensure_fts_index, prepare_knowledge_entity_test_db, setup_test_db};
use anyhow::{self, Context};
#[test]
@@ -509,27 +509,6 @@ mod tests {
assert_eq!(text, "name: Alpha, description: Beta, type: TextSnippet");
}
async fn ensure_entity_fts_indexes(db: &SurrealDbClient) -> anyhow::Result<()> {
let snowball_sql = r#"
DEFINE ANALYZER IF NOT EXISTS app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii, snowball(english);
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_name_idx ON TABLE knowledge_entity FIELDS name SEARCH ANALYZER app_en_fts_analyzer BM25;
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_description_idx ON TABLE knowledge_entity FIELDS description SEARCH ANALYZER app_en_fts_analyzer BM25;
"#;
if let Err(err) = db.client.query(snowball_sql).await {
let fallback_sql = r#"
DEFINE ANALYZER OVERWRITE app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii;
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_name_idx ON TABLE knowledge_entity FIELDS name SEARCH ANALYZER app_en_fts_analyzer BM25;
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_description_idx ON TABLE knowledge_entity FIELDS description SEARCH ANALYZER app_en_fts_analyzer BM25;
"#;
db.client
.query(fallback_sql)
.await
.with_context(|| format!("define entity fts index fallback: {err}"))?;
}
Ok(())
}
use serde_json::json;
#[tokio::test]
@@ -962,7 +941,12 @@ mod tests {
#[tokio::test]
async fn test_fts_search_returns_empty_when_no_entities() -> anyhow::Result<()> {
let db = setup_test_db().await?;
ensure_entity_fts_indexes(&db).await?;
ensure_fts_index(
&db,
"knowledge_entity",
&[("name", "name"), ("description", "description")],
)
.await?;
rebuild(&db)
.await
.with_context(|| "rebuild indexes".to_string())?;
@@ -978,7 +962,12 @@ mod tests {
#[tokio::test]
async fn test_fts_search_single_result() -> anyhow::Result<()> {
let db = setup_test_db().await?;
ensure_entity_fts_indexes(&db).await?;
ensure_fts_index(
&db,
"knowledge_entity",
&[("name", "name"), ("description", "description")],
)
.await?;
let user_id = "fts_user";
let entity = KnowledgeEntity::new(
@@ -1010,7 +999,12 @@ mod tests {
#[tokio::test]
async fn test_fts_search_orders_by_score_and_filters_user() -> anyhow::Result<()> {
let db = setup_test_db().await?;
ensure_entity_fts_indexes(&db).await?;
ensure_fts_index(
&db,
"knowledge_entity",
&[("name", "name"), ("description", "description")],
)
.await?;
let user_id = "fts_user_order";
let high_score_entity = KnowledgeEntity::new(
+4 -24
View File
@@ -323,30 +323,10 @@ mod tests {
use crate::storage::indexes::{ensure_runtime, rebuild};
use crate::storage::types::text_chunk_embedding::TextChunkEmbedding;
use crate::test_utils::{
configure_embedding_dimension, prepare_text_chunk_test_db, setup_test_db,
configure_embedding_dimension, ensure_fts_index, prepare_text_chunk_test_db, setup_test_db,
};
use surrealdb::RecordId;
async fn ensure_chunk_fts_index(db: &SurrealDbClient) -> anyhow::Result<()> {
let snowball_sql = r#"
DEFINE ANALYZER IF NOT EXISTS app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii, snowball(english);
DEFINE INDEX IF NOT EXISTS text_chunk_fts_chunk_idx ON TABLE text_chunk FIELDS chunk SEARCH ANALYZER app_en_fts_analyzer BM25;
"#;
if let Err(err) = db.client.query(snowball_sql).await {
let fallback_sql = r#"
DEFINE ANALYZER OVERWRITE app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii;
DEFINE INDEX IF NOT EXISTS text_chunk_fts_chunk_idx ON TABLE text_chunk FIELDS chunk SEARCH ANALYZER app_en_fts_analyzer BM25;
"#;
db.client
.query(fallback_sql)
.await
.with_context(|| format!("define chunk fts index fallback: {err}"))?;
}
Ok(())
}
#[tokio::test]
async fn test_text_chunk_creation() -> anyhow::Result<()> {
let source_id = "source123".to_string();
@@ -659,7 +639,7 @@ mod tests {
#[tokio::test]
async fn test_fts_search_returns_empty_when_no_chunks() -> anyhow::Result<()> {
let db = setup_test_db().await?;
ensure_chunk_fts_index(&db).await?;
ensure_fts_index(&db, "text_chunk", &[("chunk", "chunk")]).await?;
rebuild(&db)
.await
.with_context(|| "rebuild indexes".to_string())?;
@@ -675,7 +655,7 @@ mod tests {
#[tokio::test]
async fn test_fts_search_single_result() -> anyhow::Result<()> {
let db = setup_test_db().await?;
ensure_chunk_fts_index(&db).await?;
ensure_fts_index(&db, "text_chunk", &[("chunk", "chunk")]).await?;
let user_id = "fts_user";
let chunk = TextChunk::new(
@@ -704,7 +684,7 @@ mod tests {
#[tokio::test]
async fn test_fts_search_orders_by_score_and_filters_user() -> anyhow::Result<()> {
let db = setup_test_db().await?;
ensure_chunk_fts_index(&db).await?;
ensure_fts_index(&db, "text_chunk", &[("chunk", "chunk")]).await?;
let user_id = "fts_user_order";
let high_score_chunk = TextChunk::new(
+44
View File
@@ -91,3 +91,47 @@ pub async fn setup_test_db_with_runtime_indexes() -> Result<SurrealDbClient> {
rebuild(&db).await?;
Ok(db)
}
/// Ensures an FTS analyzer and BM25 indexes exist for a table.
///
/// Attempts snowball(english) tokenizer first; falls back to basic
/// lowercase+ascii when the platform lacks the snowball extension.
///
/// `indexes` is a slice of `(field_name, index_id_suffix)` pairs —
/// e.g. `&[("chunk", "chunk")]` produces index
/// `text_chunk_fts_chunk_idx` on column `chunk` of `text_chunk`.
///
/// # Errors
///
/// Returns an error if the fallback definition fails. The initial
/// snowball attempt is allowed to fail silently.
pub async fn ensure_fts_index(
db: &SurrealDbClient,
table: &str,
indexes: &[(&str, &str)],
) -> Result<()> {
use std::fmt::Write;
let mut define_indexes = String::new();
for (field, suffix) in indexes {
let _ = writeln!(
define_indexes,
"DEFINE INDEX IF NOT EXISTS {table}_fts_{suffix}_idx ON TABLE {table} FIELDS {field} SEARCH ANALYZER app_en_fts_analyzer BM25;"
);
}
let snowball_sql = format!(
"DEFINE ANALYZER IF NOT EXISTS app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii, snowball(english);\n{define_indexes}"
);
if let Err(err) = db.client.query(&snowball_sql).await {
let fallback_sql = format!(
"DEFINE ANALYZER OVERWRITE app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii;\n{define_indexes}"
);
db.client
.query(&fallback_sql)
.await
.with_context(|| format!("define fts index fallback for {table}: {err}"))?;
}
Ok(())
}