mirror of
https://github.com/perstarkse/minne.git
synced 2026-05-28 10:29:30 +02:00
clippy: adhere to pedantic clippy, uniform test error handling
This commit is contained in:
+97
-118
@@ -3,7 +3,7 @@ use axum::{extract::FromRef, Router};
|
||||
use common::{
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
indexes::ensure_runtime_indexes,
|
||||
indexes::ensure_runtime,
|
||||
store::StorageManager,
|
||||
types::{
|
||||
knowledge_entity::KnowledgeEntity, system_settings::SystemSettings,
|
||||
@@ -12,7 +12,10 @@ use common::{
|
||||
},
|
||||
utils::{config::get_config, embedding::EmbeddingProvider},
|
||||
};
|
||||
use html_router::{html_routes, html_state::HtmlState};
|
||||
use html_router::{
|
||||
html_routes,
|
||||
html_state::{HtmlState, StateResources},
|
||||
};
|
||||
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
|
||||
use retrieval_pipeline::reranking::RerankerPool;
|
||||
use std::sync::Arc;
|
||||
@@ -21,19 +24,77 @@ use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
||||
|
||||
use tokio::task::LocalSet;
|
||||
|
||||
fn spawn_server_thread(
|
||||
listener: tokio::net::TcpListener,
|
||||
app: Router,
|
||||
) -> std::thread::JoinHandle<()> {
|
||||
std::thread::spawn(move || {
|
||||
let rt = match tokio::runtime::Runtime::new() {
|
||||
Ok(rt) => rt,
|
||||
Err(e) => {
|
||||
error!("Failed to create server runtime: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
rt.block_on(async {
|
||||
if let Err(e) = axum::serve(listener, app).await {
|
||||
error!("Server error: {}", e);
|
||||
}
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
async fn run_worker(
|
||||
config: common::utils::config::AppConfig,
|
||||
reranker_pool: Option<Arc<RerankerPool>>,
|
||||
storage: StorageManager,
|
||||
) -> anyhow::Result<()> {
|
||||
let worker_db = Arc::new(
|
||||
SurrealDbClient::new(
|
||||
&config.surrealdb_address,
|
||||
&config.surrealdb_username,
|
||||
&config.surrealdb_password,
|
||||
&config.surrealdb_namespace,
|
||||
&config.surrealdb_database,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
let openai_client = Arc::new(async_openai::Client::with_config(
|
||||
async_openai::config::OpenAIConfig::new()
|
||||
.with_api_key(&config.openai_api_key)
|
||||
.with_api_base(&config.openai_base_url),
|
||||
));
|
||||
|
||||
let embedding_provider = Arc::new(
|
||||
EmbeddingProvider::from_config(&config, Some(Arc::clone(&openai_client))).await?,
|
||||
);
|
||||
|
||||
let ingestion_pipeline = Arc::new(
|
||||
IngestionPipeline::new(
|
||||
Arc::clone(&worker_db),
|
||||
openai_client,
|
||||
config,
|
||||
reranker_pool,
|
||||
storage,
|
||||
embedding_provider,
|
||||
)?,
|
||||
);
|
||||
|
||||
info!("Starting worker process");
|
||||
run_worker_loop(worker_db, ingestion_pipeline).await
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
// Set up tracing
|
||||
tracing_subscriber::registry()
|
||||
.with(fmt::layer().with_writer(std::io::stderr))
|
||||
.with(EnvFilter::from_default_env())
|
||||
.try_init()
|
||||
.ok();
|
||||
|
||||
// Get config
|
||||
let config = get_config()?;
|
||||
|
||||
// Set up router states
|
||||
let db = Arc::new(
|
||||
SurrealDbClient::new(
|
||||
&config.surrealdb_address,
|
||||
@@ -45,7 +106,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
.await?,
|
||||
);
|
||||
|
||||
// Ensure db is initialized
|
||||
db.apply_migrations().await?;
|
||||
|
||||
let session_store = Arc::new(db.create_session_store().await?);
|
||||
@@ -55,27 +115,23 @@ async fn main() -> anyhow::Result<()> {
|
||||
.with_api_base(&config.openai_base_url),
|
||||
));
|
||||
|
||||
// Create embedding provider based on config before syncing settings.
|
||||
let embedding_provider =
|
||||
Arc::new(EmbeddingProvider::from_config(&config, Some(openai_client.clone())).await?);
|
||||
Arc::new(EmbeddingProvider::from_config(&config, Some(Arc::clone(&openai_client))).await?);
|
||||
info!(
|
||||
embedding_backend = ?config.embedding_backend,
|
||||
embedding_dimension = embedding_provider.dimension(),
|
||||
"Embedding provider initialized"
|
||||
);
|
||||
|
||||
// Sync SystemSettings with provider's dimensions/model/backend
|
||||
let (settings, dimensions_changed) =
|
||||
SystemSettings::sync_from_embedding_provider(&db, &embedding_provider).await?;
|
||||
|
||||
// If dimensions changed, re-embed existing data to keep queries working.
|
||||
if dimensions_changed {
|
||||
warn!(
|
||||
new_dimensions = settings.embedding_dimensions,
|
||||
"Embedding configuration changed; re-embedding existing data"
|
||||
);
|
||||
|
||||
// Re-embed text chunks
|
||||
info!("Re-embedding TextChunks");
|
||||
if let Err(e) =
|
||||
TextChunk::update_all_embeddings_with_provider(&db, &embedding_provider).await
|
||||
@@ -86,7 +142,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
);
|
||||
}
|
||||
|
||||
// Re-embed knowledge entities
|
||||
info!("Re-embedding KnowledgeEntities");
|
||||
if let Err(e) =
|
||||
KnowledgeEntity::update_all_embeddings_with_provider(&db, &embedding_provider).await
|
||||
@@ -100,29 +155,25 @@ async fn main() -> anyhow::Result<()> {
|
||||
info!("Re-embedding complete.");
|
||||
}
|
||||
|
||||
// Now ensure runtime indexes with the correct (synced) dimensions
|
||||
ensure_runtime_indexes(&db, settings.embedding_dimensions as usize).await?;
|
||||
ensure_runtime(&db, settings.embedding_dimensions as usize).await?;
|
||||
|
||||
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
|
||||
|
||||
// Create global storage manager
|
||||
let storage = StorageManager::new(&config).await?;
|
||||
|
||||
let html_state = HtmlState::new_with_resources(
|
||||
let html_state = HtmlState::new_with_resources(StateResources {
|
||||
db,
|
||||
openai_client,
|
||||
session_store,
|
||||
storage.clone(),
|
||||
config.clone(),
|
||||
reranker_pool.clone(),
|
||||
embedding_provider.clone(),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
storage: storage.clone(),
|
||||
config: config.clone(),
|
||||
reranker_pool: reranker_pool.clone(),
|
||||
embedding_provider: Arc::clone(&embedding_provider),
|
||||
template_engine: None,
|
||||
});
|
||||
|
||||
let api_state = ApiState::new(&config, storage.clone()).await?;
|
||||
|
||||
// Create Axum router
|
||||
let app = Router::new()
|
||||
.nest("/api/v1", api_routes_v1(&api_state))
|
||||
.merge(html_routes(&html_state))
|
||||
@@ -135,72 +186,16 @@ async fn main() -> anyhow::Result<()> {
|
||||
let serve_address = format!("0.0.0.0:{}", config.http_port);
|
||||
let listener = tokio::net::TcpListener::bind(serve_address).await?;
|
||||
|
||||
// Start the server in a separate OS thread with its own runtime
|
||||
let server_handle = std::thread::spawn(move || {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
rt.block_on(async {
|
||||
if let Err(e) = axum::serve(listener, app).await {
|
||||
error!("Server error: {}", e);
|
||||
}
|
||||
});
|
||||
});
|
||||
let server_handle = spawn_server_thread(listener, app);
|
||||
|
||||
// Create a LocalSet for the worker
|
||||
let local = LocalSet::new();
|
||||
|
||||
// Use a clone of the config for the worker
|
||||
let worker_config = config.clone();
|
||||
|
||||
// Run the worker in the local set
|
||||
local.spawn_local(async move {
|
||||
// Create worker db connection
|
||||
let worker_db = Arc::new(
|
||||
SurrealDbClient::new(
|
||||
&worker_config.surrealdb_address,
|
||||
&worker_config.surrealdb_username,
|
||||
&worker_config.surrealdb_password,
|
||||
&worker_config.surrealdb_namespace,
|
||||
&worker_config.surrealdb_database,
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
// Initialize worker components
|
||||
let openai_client = Arc::new(async_openai::Client::with_config(
|
||||
async_openai::config::OpenAIConfig::new()
|
||||
.with_api_key(&config.openai_api_key)
|
||||
.with_api_base(&config.openai_base_url),
|
||||
));
|
||||
|
||||
// Create embedding provider based on config
|
||||
let embedding_provider = Arc::new(
|
||||
EmbeddingProvider::from_config(&config, Some(openai_client.clone()))
|
||||
.await
|
||||
.expect("failed to create embedding provider"),
|
||||
);
|
||||
let ingestion_pipeline = Arc::new(
|
||||
IngestionPipeline::new(
|
||||
worker_db.clone(),
|
||||
openai_client.clone(),
|
||||
config.clone(),
|
||||
reranker_pool.clone(),
|
||||
storage.clone(),
|
||||
embedding_provider,
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
info!("Starting worker process");
|
||||
if let Err(e) = run_worker_loop(worker_db, ingestion_pipeline).await {
|
||||
error!("Worker process error: {}", e);
|
||||
if let Err(e) = run_worker(config, reranker_pool, storage).await {
|
||||
error!("Worker error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// Run the local set on the main thread
|
||||
local.await;
|
||||
|
||||
// Wait for the server thread to finish (this likely won't be reached)
|
||||
if let Err(e) = server_handle.join() {
|
||||
error!("Server thread panicked: {:?}", e);
|
||||
}
|
||||
@@ -253,52 +248,39 @@ mod tests {
|
||||
let namespace = "test_ns";
|
||||
let database = format!("test_db_{}", Uuid::new_v4());
|
||||
let data_dir = std::env::temp_dir().join(format!("minne_smoke_{}", Uuid::new_v4()));
|
||||
|
||||
tokio::fs::create_dir_all(&data_dir)
|
||||
.await
|
||||
tokio::fs::create_dir_all(&data_dir).await
|
||||
.expect("failed to create temp data directory");
|
||||
|
||||
let config = smoke_test_config(namespace, &database, &data_dir);
|
||||
let db = Arc::new(
|
||||
SurrealDbClient::memory(namespace, &database)
|
||||
.await
|
||||
.expect("failed to start in-memory surrealdb"),
|
||||
);
|
||||
db.apply_migrations()
|
||||
.await
|
||||
.expect("failed to apply migrations");
|
||||
let db = Arc::new(SurrealDbClient::memory(namespace, &database).await?);
|
||||
db.apply_migrations().await?;
|
||||
|
||||
let session_store = Arc::new(db.create_session_store().await.expect("session store"));
|
||||
let session_store = Arc::new(db.create_session_store().await?);
|
||||
let openai_client = Arc::new(async_openai::Client::with_config(
|
||||
async_openai::config::OpenAIConfig::new()
|
||||
.with_api_key(&config.openai_api_key)
|
||||
.with_api_base(&config.openai_base_url),
|
||||
));
|
||||
|
||||
let storage = StorageManager::new(&config)
|
||||
.await
|
||||
.expect("failed to build storage manager");
|
||||
let storage = StorageManager::new(&config).await?;
|
||||
|
||||
// Use hashed embeddings for tests to avoid external dependencies
|
||||
let embedding_provider = Arc::new(
|
||||
common::utils::embedding::EmbeddingProvider::new_hashed(384)
|
||||
.expect("failed to create hashed embedding provider"),
|
||||
common::utils::embedding::EmbeddingProvider::new_hashed(384)?,
|
||||
);
|
||||
|
||||
let html_state = HtmlState::new_with_resources(
|
||||
db.clone(),
|
||||
let html_state = HtmlState::new_with_resources(StateResources {
|
||||
db: Arc::clone(&db),
|
||||
openai_client,
|
||||
session_store,
|
||||
storage.clone(),
|
||||
config.clone(),
|
||||
None,
|
||||
storage: storage.clone(),
|
||||
config: config.clone(),
|
||||
reranker_pool: None,
|
||||
embedding_provider,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
template_engine: None,
|
||||
});
|
||||
|
||||
let api_state = ApiState {
|
||||
db: db.clone(),
|
||||
db: Arc::clone(&db),
|
||||
config: config.clone(),
|
||||
storage,
|
||||
};
|
||||
@@ -376,25 +358,22 @@ mod tests {
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/api/v1/live")
|
||||
.body(Body::empty())
|
||||
.expect("request"),
|
||||
.body(Body::empty())?,
|
||||
)
|
||||
.await
|
||||
.expect("router response");
|
||||
.await?;
|
||||
assert_eq!(response.status(), StatusCode::OK);
|
||||
|
||||
let ready_response = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/api/v1/ready")
|
||||
.body(Body::empty())
|
||||
.expect("request"),
|
||||
.body(Body::empty())?,
|
||||
)
|
||||
.await
|
||||
.expect("ready response");
|
||||
.await?;
|
||||
assert_eq!(ready_response.status(), StatusCode::OK);
|
||||
|
||||
tokio::fs::remove_dir_all(&data_dir).await.ok();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
|
||||
+10
-8
@@ -6,7 +6,10 @@ use common::{
|
||||
storage::{db::SurrealDbClient, store::StorageManager, types::system_settings::SystemSettings},
|
||||
utils::{config::get_config, embedding::EmbeddingProvider},
|
||||
};
|
||||
use html_router::{html_routes, html_state::HtmlState};
|
||||
use html_router::{
|
||||
html_routes,
|
||||
html_state::{HtmlState, StateResources},
|
||||
};
|
||||
use retrieval_pipeline::reranking::RerankerPool;
|
||||
use tracing::info;
|
||||
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
||||
@@ -52,7 +55,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
// Create embedding provider based on config
|
||||
let embedding_provider =
|
||||
Arc::new(EmbeddingProvider::from_config(&config, Some(openai_client.clone())).await?);
|
||||
Arc::new(EmbeddingProvider::from_config(&config, Some(Arc::clone(&openai_client))).await?);
|
||||
info!(
|
||||
embedding_backend = ?config.embedding_backend,
|
||||
embedding_dimension = embedding_provider.dimension(),
|
||||
@@ -63,17 +66,16 @@ async fn main() -> anyhow::Result<()> {
|
||||
let (_settings, _dimensions_changed) =
|
||||
SystemSettings::sync_from_embedding_provider(&db, &embedding_provider).await?;
|
||||
|
||||
let html_state = HtmlState::new_with_resources(
|
||||
let html_state = HtmlState::new_with_resources(StateResources {
|
||||
db,
|
||||
openai_client,
|
||||
session_store,
|
||||
storage.clone(),
|
||||
config.clone(),
|
||||
storage: storage.clone(),
|
||||
config: config.clone(),
|
||||
reranker_pool,
|
||||
embedding_provider,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
template_engine: None,
|
||||
});
|
||||
|
||||
let api_state = ApiState::new(&config, storage).await?;
|
||||
|
||||
|
||||
+3
-3
@@ -42,7 +42,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
// Create embedding provider based on config
|
||||
let embedding_provider =
|
||||
Arc::new(EmbeddingProvider::from_config(&config, Some(openai_client.clone())).await?);
|
||||
Arc::new(EmbeddingProvider::from_config(&config, Some(Arc::clone(&openai_client))).await?);
|
||||
info!(
|
||||
embedding_backend = ?config.embedding_backend,
|
||||
"Embedding provider initialized for worker"
|
||||
@@ -52,8 +52,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
let storage = StorageManager::new(&config).await?;
|
||||
|
||||
let ingestion_pipeline = Arc::new(IngestionPipeline::new(
|
||||
db.clone(),
|
||||
openai_client.clone(),
|
||||
Arc::clone(&db),
|
||||
Arc::clone(&openai_client),
|
||||
config,
|
||||
reranker_pool,
|
||||
storage,
|
||||
|
||||
Reference in New Issue
Block a user