mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-30 10:01:40 +02:00
tidying stuff up, dto for search
This commit is contained in:
@@ -2,6 +2,19 @@ use config::{Config, ConfigError, Environment, File};
|
||||
use serde::Deserialize;
|
||||
use std::env;
|
||||
|
||||
/// Selects the embedding backend for vector generation.
|
||||
#[derive(Clone, Deserialize, Debug, Default, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum EmbeddingBackend {
|
||||
/// Use OpenAI-compatible API for embeddings.
|
||||
OpenAI,
|
||||
/// Use FastEmbed local embeddings (default).
|
||||
#[default]
|
||||
FastEmbed,
|
||||
/// Use deterministic hashed embeddings (for testing).
|
||||
Hashed,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum StorageKind {
|
||||
@@ -60,6 +73,8 @@ pub struct AppConfig {
|
||||
pub fastembed_max_length: Option<usize>,
|
||||
#[serde(default)]
|
||||
pub retrieval_strategy: Option<String>,
|
||||
#[serde(default)]
|
||||
pub embedding_backend: EmbeddingBackend,
|
||||
}
|
||||
|
||||
/// Default data directory for persisted assets.
|
||||
@@ -127,6 +142,7 @@ impl Default for AppConfig {
|
||||
fastembed_show_download_progress: None,
|
||||
fastembed_max_length: None,
|
||||
retrieval_strategy: None,
|
||||
embedding_backend: EmbeddingBackend::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,6 +235,34 @@ impl EmbeddingProvider {
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates an embedding provider based on application configuration.
|
||||
///
|
||||
/// Dispatches to the appropriate constructor based on `config.embedding_backend`:
|
||||
/// - `OpenAI`: Requires a valid OpenAI client
|
||||
/// - `FastEmbed`: Uses local embedding model
|
||||
/// - `Hashed`: Uses deterministic hashed embeddings (for testing)
|
||||
pub async fn from_config(
|
||||
config: &crate::utils::config::AppConfig,
|
||||
openai_client: Option<Arc<Client<async_openai::config::OpenAIConfig>>>,
|
||||
) -> Result<Self> {
|
||||
use crate::utils::config::EmbeddingBackend;
|
||||
|
||||
match config.embedding_backend {
|
||||
EmbeddingBackend::OpenAI => {
|
||||
let client = openai_client.ok_or_else(|| {
|
||||
anyhow!("OpenAI embedding backend requires an OpenAI client")
|
||||
})?;
|
||||
// Use defaults that match SystemSettings initial values
|
||||
Self::new_openai(client, "text-embedding-3-small".to_string(), 1536)
|
||||
}
|
||||
EmbeddingBackend::FastEmbed => {
|
||||
// Use nomic-embed-text-v1.5 as the default FastEmbed model
|
||||
Self::new_fastembed(Some("nomic-ai/nomic-embed-text-v1.5".to_string())).await
|
||||
}
|
||||
EmbeddingBackend::Hashed => Self::new_hashed(384),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions for hashed embeddings
|
||||
|
||||
Reference in New Issue
Block a user