mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-24 10:56:29 +02:00
refactor: extract generic ensure_fts_index helper
This commit is contained in:
@@ -496,7 +496,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::storage::indexes::rebuild;
|
||||
use crate::storage::types::knowledge_entity_embedding::KnowledgeEntityEmbedding;
|
||||
use crate::test_utils::{prepare_knowledge_entity_test_db, setup_test_db};
|
||||
use crate::test_utils::{ensure_fts_index, prepare_knowledge_entity_test_db, setup_test_db};
|
||||
use anyhow::{self, Context};
|
||||
|
||||
#[test]
|
||||
@@ -509,27 +509,6 @@ mod tests {
|
||||
assert_eq!(text, "name: Alpha, description: Beta, type: TextSnippet");
|
||||
}
|
||||
|
||||
async fn ensure_entity_fts_indexes(db: &SurrealDbClient) -> anyhow::Result<()> {
|
||||
let snowball_sql = r#"
|
||||
DEFINE ANALYZER IF NOT EXISTS app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii, snowball(english);
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_name_idx ON TABLE knowledge_entity FIELDS name SEARCH ANALYZER app_en_fts_analyzer BM25;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_description_idx ON TABLE knowledge_entity FIELDS description SEARCH ANALYZER app_en_fts_analyzer BM25;
|
||||
"#;
|
||||
|
||||
if let Err(err) = db.client.query(snowball_sql).await {
|
||||
let fallback_sql = r#"
|
||||
DEFINE ANALYZER OVERWRITE app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_name_idx ON TABLE knowledge_entity FIELDS name SEARCH ANALYZER app_en_fts_analyzer BM25;
|
||||
DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_description_idx ON TABLE knowledge_entity FIELDS description SEARCH ANALYZER app_en_fts_analyzer BM25;
|
||||
"#;
|
||||
|
||||
db.client
|
||||
.query(fallback_sql)
|
||||
.await
|
||||
.with_context(|| format!("define entity fts index fallback: {err}"))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
use serde_json::json;
|
||||
|
||||
#[tokio::test]
|
||||
@@ -962,7 +941,12 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_fts_search_returns_empty_when_no_entities() -> anyhow::Result<()> {
|
||||
let db = setup_test_db().await?;
|
||||
ensure_entity_fts_indexes(&db).await?;
|
||||
ensure_fts_index(
|
||||
&db,
|
||||
"knowledge_entity",
|
||||
&[("name", "name"), ("description", "description")],
|
||||
)
|
||||
.await?;
|
||||
rebuild(&db)
|
||||
.await
|
||||
.with_context(|| "rebuild indexes".to_string())?;
|
||||
@@ -978,7 +962,12 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_fts_search_single_result() -> anyhow::Result<()> {
|
||||
let db = setup_test_db().await?;
|
||||
ensure_entity_fts_indexes(&db).await?;
|
||||
ensure_fts_index(
|
||||
&db,
|
||||
"knowledge_entity",
|
||||
&[("name", "name"), ("description", "description")],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let user_id = "fts_user";
|
||||
let entity = KnowledgeEntity::new(
|
||||
@@ -1010,7 +999,12 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_fts_search_orders_by_score_and_filters_user() -> anyhow::Result<()> {
|
||||
let db = setup_test_db().await?;
|
||||
ensure_entity_fts_indexes(&db).await?;
|
||||
ensure_fts_index(
|
||||
&db,
|
||||
"knowledge_entity",
|
||||
&[("name", "name"), ("description", "description")],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let user_id = "fts_user_order";
|
||||
let high_score_entity = KnowledgeEntity::new(
|
||||
|
||||
@@ -323,30 +323,10 @@ mod tests {
|
||||
use crate::storage::indexes::{ensure_runtime, rebuild};
|
||||
use crate::storage::types::text_chunk_embedding::TextChunkEmbedding;
|
||||
use crate::test_utils::{
|
||||
configure_embedding_dimension, prepare_text_chunk_test_db, setup_test_db,
|
||||
configure_embedding_dimension, ensure_fts_index, prepare_text_chunk_test_db, setup_test_db,
|
||||
};
|
||||
use surrealdb::RecordId;
|
||||
|
||||
async fn ensure_chunk_fts_index(db: &SurrealDbClient) -> anyhow::Result<()> {
|
||||
let snowball_sql = r#"
|
||||
DEFINE ANALYZER IF NOT EXISTS app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii, snowball(english);
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_fts_chunk_idx ON TABLE text_chunk FIELDS chunk SEARCH ANALYZER app_en_fts_analyzer BM25;
|
||||
"#;
|
||||
|
||||
if let Err(err) = db.client.query(snowball_sql).await {
|
||||
let fallback_sql = r#"
|
||||
DEFINE ANALYZER OVERWRITE app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii;
|
||||
DEFINE INDEX IF NOT EXISTS text_chunk_fts_chunk_idx ON TABLE text_chunk FIELDS chunk SEARCH ANALYZER app_en_fts_analyzer BM25;
|
||||
"#;
|
||||
|
||||
db.client
|
||||
.query(fallback_sql)
|
||||
.await
|
||||
.with_context(|| format!("define chunk fts index fallback: {err}"))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_text_chunk_creation() -> anyhow::Result<()> {
|
||||
let source_id = "source123".to_string();
|
||||
@@ -659,7 +639,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_fts_search_returns_empty_when_no_chunks() -> anyhow::Result<()> {
|
||||
let db = setup_test_db().await?;
|
||||
ensure_chunk_fts_index(&db).await?;
|
||||
ensure_fts_index(&db, "text_chunk", &[("chunk", "chunk")]).await?;
|
||||
rebuild(&db)
|
||||
.await
|
||||
.with_context(|| "rebuild indexes".to_string())?;
|
||||
@@ -675,7 +655,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_fts_search_single_result() -> anyhow::Result<()> {
|
||||
let db = setup_test_db().await?;
|
||||
ensure_chunk_fts_index(&db).await?;
|
||||
ensure_fts_index(&db, "text_chunk", &[("chunk", "chunk")]).await?;
|
||||
|
||||
let user_id = "fts_user";
|
||||
let chunk = TextChunk::new(
|
||||
@@ -704,7 +684,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_fts_search_orders_by_score_and_filters_user() -> anyhow::Result<()> {
|
||||
let db = setup_test_db().await?;
|
||||
ensure_chunk_fts_index(&db).await?;
|
||||
ensure_fts_index(&db, "text_chunk", &[("chunk", "chunk")]).await?;
|
||||
|
||||
let user_id = "fts_user_order";
|
||||
let high_score_chunk = TextChunk::new(
|
||||
|
||||
@@ -91,3 +91,47 @@ pub async fn setup_test_db_with_runtime_indexes() -> Result<SurrealDbClient> {
|
||||
rebuild(&db).await?;
|
||||
Ok(db)
|
||||
}
|
||||
|
||||
/// Ensures an FTS analyzer and BM25 indexes exist for a table.
|
||||
///
|
||||
/// Attempts snowball(english) tokenizer first; falls back to basic
|
||||
/// lowercase+ascii when the platform lacks the snowball extension.
|
||||
///
|
||||
/// `indexes` is a slice of `(field_name, index_id_suffix)` pairs —
|
||||
/// e.g. `&[("chunk", "chunk")]` produces index
|
||||
/// `text_chunk_fts_chunk_idx` on column `chunk` of `text_chunk`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if the fallback definition fails. The initial
|
||||
/// snowball attempt is allowed to fail silently.
|
||||
pub async fn ensure_fts_index(
|
||||
db: &SurrealDbClient,
|
||||
table: &str,
|
||||
indexes: &[(&str, &str)],
|
||||
) -> Result<()> {
|
||||
use std::fmt::Write;
|
||||
|
||||
let mut define_indexes = String::new();
|
||||
for (field, suffix) in indexes {
|
||||
let _ = writeln!(
|
||||
define_indexes,
|
||||
"DEFINE INDEX IF NOT EXISTS {table}_fts_{suffix}_idx ON TABLE {table} FIELDS {field} SEARCH ANALYZER app_en_fts_analyzer BM25;"
|
||||
);
|
||||
}
|
||||
|
||||
let snowball_sql = format!(
|
||||
"DEFINE ANALYZER IF NOT EXISTS app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii, snowball(english);\n{define_indexes}"
|
||||
);
|
||||
|
||||
if let Err(err) = db.client.query(&snowball_sql).await {
|
||||
let fallback_sql = format!(
|
||||
"DEFINE ANALYZER OVERWRITE app_en_fts_analyzer TOKENIZERS class, punct FILTERS lowercase, ascii;\n{define_indexes}"
|
||||
);
|
||||
db.client
|
||||
.query(&fallback_sql)
|
||||
.await
|
||||
.with_context(|| format!("define fts index fallback for {table}: {err}"))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user