fix: arc-share retrieved chunks, centralize entity embeddings, and trim hot-path clones.

This commit is contained in:
Per Stark
2026-06-06 23:05:53 +02:00
parent 676fdbc132
commit 4559ee0aa8
41 changed files with 368 additions and 289 deletions
+1 -4
View File
@@ -8,10 +8,7 @@ use std::sync::Arc;
use anyhow::Context;
use async_openai::Client;
use common::{
storage::{
db::SurrealDbClient,
store::StorageManager,
},
storage::{db::SurrealDbClient, store::StorageManager},
utils::{
config::{get_config, AppConfig},
embedding::{align_fastembed_system_settings, EmbeddingProvider},
+15 -13
View File
@@ -67,7 +67,8 @@ pub async fn prepare_embedding_runtime(
let index_dim = if mismatch {
match role {
EmbeddingRuntimeRole::Maintainer => {
reconcile_embeddings(&services.db, &services.embedding_provider, target_dim).await?;
reconcile_embeddings(&services.db, &services.embedding_provider, target_dim)
.await?;
target_dim
}
EmbeddingRuntimeRole::ReadOnly => {
@@ -238,9 +239,7 @@ mod tests {
stored_dim: usize,
target_dim: usize,
) -> (super::SharedServices, std::path::PathBuf) {
let (mut services, data_dir) = init_smoke_services()
.await
.expect("smoke services");
let (mut services, data_dir) = init_smoke_services().await.expect("smoke services");
ensure_runtime(&services.db, stored_dim)
.await
@@ -254,9 +253,8 @@ mod tests {
.await
.expect("update settings");
services.embedding_provider = Arc::new(
EmbeddingProvider::new_hashed(target_dim).expect("hashed provider for test"),
);
services.embedding_provider =
Arc::new(EmbeddingProvider::new_hashed(target_dim).expect("hashed provider for test"));
(services, data_dir)
}
@@ -270,7 +268,9 @@ mod tests {
.expect("maintainer startup");
assert_eq!(
embedding_index_dimension(&services.db).await.expect("index dim"),
embedding_index_dimension(&services.db)
.await
.expect("index dim"),
Some(5),
"maintainer should rebuild the index to the provider dimension"
);
@@ -287,7 +287,9 @@ mod tests {
.expect("read-only startup");
assert_eq!(
embedding_index_dimension(&services.db).await.expect("index dim"),
embedding_index_dimension(&services.db)
.await
.expect("index dim"),
Some(3),
"read-only server must not overwrite the index before a maintainer re-embeds"
);
@@ -297,9 +299,7 @@ mod tests {
#[tokio::test]
async fn maintainer_reembeds_chunks_when_index_dimension_differs() {
let (mut services, data_dir) = init_smoke_services()
.await
.expect("smoke services");
let (mut services, data_dir) = init_smoke_services().await.expect("smoke services");
let mut settings = SystemSettings::get_current(&services.db)
.await
@@ -339,7 +339,9 @@ mod tests {
.expect("maintainer startup with data");
assert_eq!(
embedding_index_dimension(&services.db).await.expect("index dim"),
embedding_index_dimension(&services.db)
.await
.expect("index dim"),
Some(5)
);
+5 -1
View File
@@ -95,7 +95,11 @@ mod tests {
use common::storage::types::{system_settings::SystemSettings, user::User};
use tower::ServiceExt;
async fn build_test_app() -> (Router, Arc<common::storage::db::SurrealDbClient>, std::path::PathBuf) {
async fn build_test_app() -> (
Router,
Arc<common::storage::db::SurrealDbClient>,
std::path::PathBuf,
) {
let (services, data_dir) = init_smoke_services()
.await
.expect("failed to init services");
+2 -3
View File
@@ -68,9 +68,8 @@ mod tests {
let db = Arc::clone(&services.db);
let pipeline = Arc::new(pipeline);
let worker = tokio::spawn(async move {
ingestion_pipeline::run_worker_loop(db, pipeline).await
});
let worker =
tokio::spawn(async move { ingestion_pipeline::run_worker_loop(db, pipeline).await });
tokio::time::sleep(Duration::from_millis(250)).await;
assert!(