tidying stuff up, dto for search

This commit is contained in:
Per Stark
2025-12-20 22:30:31 +01:00
parent 90bac299a3
commit 86270de873
23 changed files with 936 additions and 73 deletions
+16
View File
@@ -2,6 +2,19 @@ use config::{Config, ConfigError, Environment, File};
use serde::Deserialize;
use std::env;
/// Selects the embedding backend for vector generation.
#[derive(Clone, Deserialize, Debug, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum EmbeddingBackend {
/// Use OpenAI-compatible API for embeddings.
OpenAI,
/// Use FastEmbed local embeddings (default).
#[default]
FastEmbed,
/// Use deterministic hashed embeddings (for testing).
Hashed,
}
#[derive(Clone, Deserialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum StorageKind {
@@ -60,6 +73,8 @@ pub struct AppConfig {
pub fastembed_max_length: Option<usize>,
#[serde(default)]
pub retrieval_strategy: Option<String>,
#[serde(default)]
pub embedding_backend: EmbeddingBackend,
}
/// Default data directory for persisted assets.
@@ -127,6 +142,7 @@ impl Default for AppConfig {
fastembed_show_download_progress: None,
fastembed_max_length: None,
retrieval_strategy: None,
embedding_backend: EmbeddingBackend::default(),
}
}
}
+28
View File
@@ -235,6 +235,34 @@ impl EmbeddingProvider {
},
})
}
/// Creates an embedding provider based on application configuration.
///
/// Dispatches to the appropriate constructor based on `config.embedding_backend`:
/// - `OpenAI`: Requires a valid OpenAI client
/// - `FastEmbed`: Uses local embedding model
/// - `Hashed`: Uses deterministic hashed embeddings (for testing)
pub async fn from_config(
config: &crate::utils::config::AppConfig,
openai_client: Option<Arc<Client<async_openai::config::OpenAIConfig>>>,
) -> Result<Self> {
use crate::utils::config::EmbeddingBackend;
match config.embedding_backend {
EmbeddingBackend::OpenAI => {
let client = openai_client.ok_or_else(|| {
anyhow!("OpenAI embedding backend requires an OpenAI client")
})?;
// Use defaults that match SystemSettings initial values
Self::new_openai(client, "text-embedding-3-small".to_string(), 1536)
}
EmbeddingBackend::FastEmbed => {
// Use nomic-embed-text-v1.5 as the default FastEmbed model
Self::new_fastembed(Some("nomic-ai/nomic-embed-text-v1.5".to_string())).await
}
EmbeddingBackend::Hashed => Self::new_hashed(384),
}
}
}
// Helper functions for hashed embeddings