feat: pool fastembed, batch embeddings, and reconcile embedding config on startup

2026-06-22 06:29:33 +02:00 · 2026-06-03 22:10:33 +02:00
parent 7b850769c9
commit 15c9f18f6e
24 changed files with 565 additions and 546 deletions
@@ -1,7 +1,7 @@
 mod startup;
 pub mod wiring;

-pub use startup::prepare_embedding_runtime;
+pub use startup::{prepare_embedding_runtime, EmbeddingRuntimeRole};

 use std::sync::Arc;

@@ -2,7 +2,7 @@ use anyhow::Context;
 use common::{
    storage::{
        db::SurrealDbClient,
-        indexes::ensure_runtime,
+        indexes::{embedding_index_dimension, ensure_runtime},
        types::{
            knowledge_entity::KnowledgeEntity, system_settings::SystemSettings,
            text_chunk::TextChunk,
@@ -10,37 +10,129 @@ use common::{
    },
    utils::embedding::EmbeddingProvider,
 };
+use std::time::{SystemTime, UNIX_EPOCH};
 use tracing::{info, warn};

 use super::SharedServices;

-/// Syncs embedding settings, re-embeds stored vectors when dimensions change, and
-/// ensures runtime indexes match the active embedding dimension.
-pub async fn prepare_embedding_runtime(services: &SharedServices) -> anyhow::Result<SystemSettings> {
-    let (settings, dimensions_changed) =
+/// How a process participates in embedding-runtime maintenance.
+///
+/// Embedding configuration changes (model/dimension) take effect on restart: the active
+/// [`EmbeddingProvider`] is built once at startup, so the stored vectors must be reconciled to it
+/// before indexes are rebuilt. Only a single maintainer should perform that (potentially long,
+/// destructive) re-embed; query-only servers stay read-only to avoid racing it.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+// Each binary (main/worker/server) constructs only one variant, so the other looks dead within
+// that single compilation unit even though both are used across the binary set.
+#[allow(dead_code)]
+pub enum EmbeddingRuntimeRole {
+    /// Combined binary or worker: re-embeds stored data when it no longer matches the provider.
+    Maintainer,
+    /// Server-only: never mutates stored embeddings; aligns indexes to the data that exists.
+    ReadOnly,
+}
+
+/// Re-embed lock TTL. Generously sized so a slow re-embed of a large corpus never expires
+/// out from under the maintainer that holds it; an abandoned lock (crashed maintainer) self-heals.
+const REEMBED_LOCK_TTL: &str = "30m";
+
+/// Reconciles embeddings with the active provider and ensures runtime indexes are ready.
+///
+/// Detection is based on the stored chunk-embedding HNSW index dimension (a persisted marker of
+/// the embedding space actually in the database). When it differs from the active provider's
+/// dimension, a [`EmbeddingRuntimeRole::Maintainer`] re-embeds before indexes are (re)built;
+/// a [`EmbeddingRuntimeRole::ReadOnly`] server leaves indexes aligned to the existing data and
+/// serves in a degraded state until a maintainer reconciles.
+///
+/// # Errors
+///
+/// Returns an error if syncing settings, inspecting/building indexes, or re-embedding fails.
+pub async fn prepare_embedding_runtime(
+    services: &SharedServices,
+    role: EmbeddingRuntimeRole,
+) -> anyhow::Result<SystemSettings> {
+    // Keep SystemSettings in sync with the active provider so the admin UI reflects the real
+    // backend/model/dimension. This does not, by itself, decide whether a re-embed is needed.
+    let (settings, _changed) =
        SystemSettings::sync_from_embedding_provider(&services.db, &services.embedding_provider)
            .await
            .context("sync system settings from embedding provider")?;

-    if dimensions_changed {
-        re_embed_all(
-            &services.db,
-            &services.embedding_provider,
-            settings.embedding_dimensions,
-        )
-        .await?;
-    }
+    let target_dim = services.embedding_provider.dimension();
+    let stored_dim = embedding_index_dimension(&services.db)
+        .await
+        .context("inspect stored embedding index dimension")?;
+    let mismatch = matches!(stored_dim, Some(dim) if dim != target_dim);

-    ensure_runtime(
-        &services.db,
-        settings.embedding_dimensions as usize,
-    )
-    .await
-    .context("ensure runtime indexes")?;
+    let index_dim = if mismatch {
+        match role {
+            EmbeddingRuntimeRole::Maintainer => {
+                reconcile_embeddings(&services.db, &services.embedding_provider, target_dim).await?;
+                target_dim
+            }
+            EmbeddingRuntimeRole::ReadOnly => {
+                warn!(
+                    stored_dimension = stored_dim,
+                    target_dimension = target_dim,
+                    "Stored embeddings do not match the active embedding dimension. A maintainer \
+                     (worker) must re-embed; serving in a degraded state and keeping indexes \
+                     aligned to the existing data until then."
+                );
+                // Preserve the index that matches the vectors actually stored. Do not overwrite it
+                // to the new dimension here — that would happen before the data is re-embedded and
+                // would break retrieval entirely.
+                stored_dim.unwrap_or(target_dim)
+            }
+        }
+    } else {
+        target_dim
+    };
+
+    ensure_runtime(&services.db, index_dim)
+        .await
+        .context("ensure runtime indexes")?;

    Ok(settings)
 }

+/// Acquires the re-embed lock (so only one maintainer reconciles), re-embeds, then releases it.
+async fn reconcile_embeddings(
+    db: &SurrealDbClient,
+    embedding_provider: &EmbeddingProvider,
+    target_dim: usize,
+) -> anyhow::Result<()> {
+    let owner = reembed_lock_owner();
+
+    if !try_acquire_reembed_lock(db, &owner).await? {
+        info!("Another maintainer holds the re-embed lock; skipping re-embed on this instance");
+        return Ok(());
+    }
+
+    let result = reconcile_under_lock(db, embedding_provider, target_dim).await;
+    release_reembed_lock(db, &owner).await;
+    result
+}
+
+/// Re-embed body executed while holding the lock, with a re-check to avoid duplicate work.
+async fn reconcile_under_lock(
+    db: &SurrealDbClient,
+    embedding_provider: &EmbeddingProvider,
+    target_dim: usize,
+) -> anyhow::Result<()> {
+    // A peer may have finished re-embedding between detection and lock acquisition.
+    let stored_dim = embedding_index_dimension(db)
+        .await
+        .context("re-check stored embedding dimension under lock")?;
+    if !matches!(stored_dim, Some(dim) if dim != target_dim) {
+        info!("Stored embeddings already match the active dimension; skipping re-embed");
+        return Ok(());
+    }
+
+    let target_dim_u32 = u32::try_from(target_dim)
+        .map_err(|_| anyhow::anyhow!("embedding dimension {target_dim} exceeds u32::MAX"))?;
+    re_embed_all(db, embedding_provider, target_dim_u32).await
+}
+
 async fn re_embed_all(
    db: &SurrealDbClient,
    embedding_provider: &EmbeddingProvider,
@@ -52,15 +144,112 @@ async fn re_embed_all(
    );

    info!("Re-embedding TextChunks");
-    TextChunk::update_all_embeddings_with_provider(db, embedding_provider)
+    TextChunk::update_all_embeddings(db, embedding_provider)
        .await
        .context("re-embed text chunks after embedding dimension change")?;

    info!("Re-embedding KnowledgeEntities");
-    KnowledgeEntity::update_all_embeddings_with_provider(db, embedding_provider)
+    KnowledgeEntity::update_all_embeddings(db, embedding_provider)
        .await
        .context("re-embed knowledge entities after embedding dimension change")?;

    info!("Re-embedding complete");
    Ok(())
 }
+
+/// A process-unique token identifying this re-embed lock acquisition (for release).
+fn reembed_lock_owner() -> String {
+    let nanos = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map_or(0, |d| d.as_nanos());
+    format!("reembed-{}-{nanos}", std::process::id())
+}
+
+/// Best-effort atomic mutex over the (potentially long) re-embed using a singleton record.
+///
+/// `CREATE` of a fixed record id fails if it already exists, which serializes concurrent
+/// maintainers. An expired lock is reaped first so a crashed maintainer cannot block forever.
+async fn try_acquire_reembed_lock(db: &SurrealDbClient, owner: &str) -> anyhow::Result<bool> {
+    db.client
+        .query("DEFINE TABLE IF NOT EXISTS maintenance_lock SCHEMALESS;")
+        .await
+        .and_then(surrealdb::Response::check)
+        .context("define maintenance_lock table")?;
+
+    db.client
+        .query("DELETE maintenance_lock:reembed WHERE expires_at < time::now();")
+        .await
+        .and_then(surrealdb::Response::check)
+        .context("reap expired re-embed lock")?;
+
+    // `CREATE` of a fixed record id succeeds for the first caller and errors with an
+    // "already exists" record conflict for any concurrent caller, giving us an atomic mutex.
+    let acquired = db
+        .client
+        .query(format!(
+            "CREATE maintenance_lock:reembed SET owner = $owner, expires_at = time::now() + {REEMBED_LOCK_TTL};"
+        ))
+        .bind(("owner", owner.to_string()))
+        .await
+        .and_then(surrealdb::Response::check)
+        .is_ok();
+
+    Ok(acquired)
+}
+
+async fn release_reembed_lock(db: &SurrealDbClient, owner: &str) {
+    let released = db
+        .client
+        .query("DELETE maintenance_lock:reembed WHERE owner = $owner;")
+        .bind(("owner", owner.to_string()))
+        .await
+        .and_then(surrealdb::Response::check);
+
+    if let Err(err) = released {
+        warn!(error = %err, "Failed to release re-embed lock; it will expire automatically");
+    }
+}
+
+#[cfg(test)]
+#[allow(clippy::expect_used)]
+mod tests {
+    use super::*;
+    use common::storage::db::SurrealDbClient;
+
+    async fn test_db() -> SurrealDbClient {
+        SurrealDbClient::memory("reembed_lock_ns", &reembed_lock_owner())
+            .await
+            .expect("in-memory db")
+    }
+
+    #[tokio::test]
+    async fn reembed_lock_is_exclusive_and_reusable_after_release() {
+        let db = test_db().await;
+
+        let first = reembed_lock_owner();
+        assert!(
+            try_acquire_reembed_lock(&db, &first)
+                .await
+                .expect("acquire first"),
+            "the first acquirer should win the lock"
+        );
+
+        // A second, concurrent maintainer must not be able to take a held lock.
+        let second = format!("{first}-peer");
+        assert!(
+            !try_acquire_reembed_lock(&db, &second)
+                .await
+                .expect("contend for lock"),
+            "a held lock must not be granted to another owner"
+        );
+
+        // Releasing it (only the holder can) frees it for the next maintainer.
+        release_reembed_lock(&db, &first).await;
+        assert!(
+            try_acquire_reembed_lock(&db, &second)
+                .await
+                .expect("re-acquire after release"),
+            "the lock should be grantable again once released"
+        );
+    }
+}