mirror of
https://github.com/perstarkse/minne.git
synced 2026-06-22 06:29:33 +02:00
feat: pool fastembed, batch embeddings, and reconcile embedding config on startup
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
mod startup;
|
||||
pub mod wiring;
|
||||
|
||||
pub use startup::prepare_embedding_runtime;
|
||||
pub use startup::{prepare_embedding_runtime, EmbeddingRuntimeRole};
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
|
||||
+210
-21
@@ -2,7 +2,7 @@ use anyhow::Context;
|
||||
use common::{
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
indexes::ensure_runtime,
|
||||
indexes::{embedding_index_dimension, ensure_runtime},
|
||||
types::{
|
||||
knowledge_entity::KnowledgeEntity, system_settings::SystemSettings,
|
||||
text_chunk::TextChunk,
|
||||
@@ -10,37 +10,129 @@ use common::{
|
||||
},
|
||||
utils::embedding::EmbeddingProvider,
|
||||
};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use super::SharedServices;
|
||||
|
||||
/// Syncs embedding settings, re-embeds stored vectors when dimensions change, and
|
||||
/// ensures runtime indexes match the active embedding dimension.
|
||||
pub async fn prepare_embedding_runtime(services: &SharedServices) -> anyhow::Result<SystemSettings> {
|
||||
let (settings, dimensions_changed) =
|
||||
/// How a process participates in embedding-runtime maintenance.
|
||||
///
|
||||
/// Embedding configuration changes (model/dimension) take effect on restart: the active
|
||||
/// [`EmbeddingProvider`] is built once at startup, so the stored vectors must be reconciled to it
|
||||
/// before indexes are rebuilt. Only a single maintainer should perform that (potentially long,
|
||||
/// destructive) re-embed; query-only servers stay read-only to avoid racing it.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
// Each binary (main/worker/server) constructs only one variant, so the other looks dead within
|
||||
// that single compilation unit even though both are used across the binary set.
|
||||
#[allow(dead_code)]
|
||||
pub enum EmbeddingRuntimeRole {
|
||||
/// Combined binary or worker: re-embeds stored data when it no longer matches the provider.
|
||||
Maintainer,
|
||||
/// Server-only: never mutates stored embeddings; aligns indexes to the data that exists.
|
||||
ReadOnly,
|
||||
}
|
||||
|
||||
/// Re-embed lock TTL. Generously sized so a slow re-embed of a large corpus never expires
|
||||
/// out from under the maintainer that holds it; an abandoned lock (crashed maintainer) self-heals.
|
||||
const REEMBED_LOCK_TTL: &str = "30m";
|
||||
|
||||
/// Reconciles embeddings with the active provider and ensures runtime indexes are ready.
|
||||
///
|
||||
/// Detection is based on the stored chunk-embedding HNSW index dimension (a persisted marker of
|
||||
/// the embedding space actually in the database). When it differs from the active provider's
|
||||
/// dimension, a [`EmbeddingRuntimeRole::Maintainer`] re-embeds before indexes are (re)built;
|
||||
/// a [`EmbeddingRuntimeRole::ReadOnly`] server leaves indexes aligned to the existing data and
|
||||
/// serves in a degraded state until a maintainer reconciles.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if syncing settings, inspecting/building indexes, or re-embedding fails.
|
||||
pub async fn prepare_embedding_runtime(
|
||||
services: &SharedServices,
|
||||
role: EmbeddingRuntimeRole,
|
||||
) -> anyhow::Result<SystemSettings> {
|
||||
// Keep SystemSettings in sync with the active provider so the admin UI reflects the real
|
||||
// backend/model/dimension. This does not, by itself, decide whether a re-embed is needed.
|
||||
let (settings, _changed) =
|
||||
SystemSettings::sync_from_embedding_provider(&services.db, &services.embedding_provider)
|
||||
.await
|
||||
.context("sync system settings from embedding provider")?;
|
||||
|
||||
if dimensions_changed {
|
||||
re_embed_all(
|
||||
&services.db,
|
||||
&services.embedding_provider,
|
||||
settings.embedding_dimensions,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
let target_dim = services.embedding_provider.dimension();
|
||||
let stored_dim = embedding_index_dimension(&services.db)
|
||||
.await
|
||||
.context("inspect stored embedding index dimension")?;
|
||||
let mismatch = matches!(stored_dim, Some(dim) if dim != target_dim);
|
||||
|
||||
ensure_runtime(
|
||||
&services.db,
|
||||
settings.embedding_dimensions as usize,
|
||||
)
|
||||
.await
|
||||
.context("ensure runtime indexes")?;
|
||||
let index_dim = if mismatch {
|
||||
match role {
|
||||
EmbeddingRuntimeRole::Maintainer => {
|
||||
reconcile_embeddings(&services.db, &services.embedding_provider, target_dim).await?;
|
||||
target_dim
|
||||
}
|
||||
EmbeddingRuntimeRole::ReadOnly => {
|
||||
warn!(
|
||||
stored_dimension = stored_dim,
|
||||
target_dimension = target_dim,
|
||||
"Stored embeddings do not match the active embedding dimension. A maintainer \
|
||||
(worker) must re-embed; serving in a degraded state and keeping indexes \
|
||||
aligned to the existing data until then."
|
||||
);
|
||||
// Preserve the index that matches the vectors actually stored. Do not overwrite it
|
||||
// to the new dimension here — that would happen before the data is re-embedded and
|
||||
// would break retrieval entirely.
|
||||
stored_dim.unwrap_or(target_dim)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
target_dim
|
||||
};
|
||||
|
||||
ensure_runtime(&services.db, index_dim)
|
||||
.await
|
||||
.context("ensure runtime indexes")?;
|
||||
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
/// Acquires the re-embed lock (so only one maintainer reconciles), re-embeds, then releases it.
|
||||
async fn reconcile_embeddings(
|
||||
db: &SurrealDbClient,
|
||||
embedding_provider: &EmbeddingProvider,
|
||||
target_dim: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
let owner = reembed_lock_owner();
|
||||
|
||||
if !try_acquire_reembed_lock(db, &owner).await? {
|
||||
info!("Another maintainer holds the re-embed lock; skipping re-embed on this instance");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let result = reconcile_under_lock(db, embedding_provider, target_dim).await;
|
||||
release_reembed_lock(db, &owner).await;
|
||||
result
|
||||
}
|
||||
|
||||
/// Re-embed body executed while holding the lock, with a re-check to avoid duplicate work.
|
||||
async fn reconcile_under_lock(
|
||||
db: &SurrealDbClient,
|
||||
embedding_provider: &EmbeddingProvider,
|
||||
target_dim: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
// A peer may have finished re-embedding between detection and lock acquisition.
|
||||
let stored_dim = embedding_index_dimension(db)
|
||||
.await
|
||||
.context("re-check stored embedding dimension under lock")?;
|
||||
if !matches!(stored_dim, Some(dim) if dim != target_dim) {
|
||||
info!("Stored embeddings already match the active dimension; skipping re-embed");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let target_dim_u32 = u32::try_from(target_dim)
|
||||
.map_err(|_| anyhow::anyhow!("embedding dimension {target_dim} exceeds u32::MAX"))?;
|
||||
re_embed_all(db, embedding_provider, target_dim_u32).await
|
||||
}
|
||||
|
||||
async fn re_embed_all(
|
||||
db: &SurrealDbClient,
|
||||
embedding_provider: &EmbeddingProvider,
|
||||
@@ -52,15 +144,112 @@ async fn re_embed_all(
|
||||
);
|
||||
|
||||
info!("Re-embedding TextChunks");
|
||||
TextChunk::update_all_embeddings_with_provider(db, embedding_provider)
|
||||
TextChunk::update_all_embeddings(db, embedding_provider)
|
||||
.await
|
||||
.context("re-embed text chunks after embedding dimension change")?;
|
||||
|
||||
info!("Re-embedding KnowledgeEntities");
|
||||
KnowledgeEntity::update_all_embeddings_with_provider(db, embedding_provider)
|
||||
KnowledgeEntity::update_all_embeddings(db, embedding_provider)
|
||||
.await
|
||||
.context("re-embed knowledge entities after embedding dimension change")?;
|
||||
|
||||
info!("Re-embedding complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A process-unique token identifying this re-embed lock acquisition (for release).
|
||||
fn reembed_lock_owner() -> String {
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_or(0, |d| d.as_nanos());
|
||||
format!("reembed-{}-{nanos}", std::process::id())
|
||||
}
|
||||
|
||||
/// Best-effort atomic mutex over the (potentially long) re-embed using a singleton record.
|
||||
///
|
||||
/// `CREATE` of a fixed record id fails if it already exists, which serializes concurrent
|
||||
/// maintainers. An expired lock is reaped first so a crashed maintainer cannot block forever.
|
||||
async fn try_acquire_reembed_lock(db: &SurrealDbClient, owner: &str) -> anyhow::Result<bool> {
|
||||
db.client
|
||||
.query("DEFINE TABLE IF NOT EXISTS maintenance_lock SCHEMALESS;")
|
||||
.await
|
||||
.and_then(surrealdb::Response::check)
|
||||
.context("define maintenance_lock table")?;
|
||||
|
||||
db.client
|
||||
.query("DELETE maintenance_lock:reembed WHERE expires_at < time::now();")
|
||||
.await
|
||||
.and_then(surrealdb::Response::check)
|
||||
.context("reap expired re-embed lock")?;
|
||||
|
||||
// `CREATE` of a fixed record id succeeds for the first caller and errors with an
|
||||
// "already exists" record conflict for any concurrent caller, giving us an atomic mutex.
|
||||
let acquired = db
|
||||
.client
|
||||
.query(format!(
|
||||
"CREATE maintenance_lock:reembed SET owner = $owner, expires_at = time::now() + {REEMBED_LOCK_TTL};"
|
||||
))
|
||||
.bind(("owner", owner.to_string()))
|
||||
.await
|
||||
.and_then(surrealdb::Response::check)
|
||||
.is_ok();
|
||||
|
||||
Ok(acquired)
|
||||
}
|
||||
|
||||
async fn release_reembed_lock(db: &SurrealDbClient, owner: &str) {
|
||||
let released = db
|
||||
.client
|
||||
.query("DELETE maintenance_lock:reembed WHERE owner = $owner;")
|
||||
.bind(("owner", owner.to_string()))
|
||||
.await
|
||||
.and_then(surrealdb::Response::check);
|
||||
|
||||
if let Err(err) = released {
|
||||
warn!(error = %err, "Failed to release re-embed lock; it will expire automatically");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::expect_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use common::storage::db::SurrealDbClient;
|
||||
|
||||
async fn test_db() -> SurrealDbClient {
|
||||
SurrealDbClient::memory("reembed_lock_ns", &reembed_lock_owner())
|
||||
.await
|
||||
.expect("in-memory db")
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reembed_lock_is_exclusive_and_reusable_after_release() {
|
||||
let db = test_db().await;
|
||||
|
||||
let first = reembed_lock_owner();
|
||||
assert!(
|
||||
try_acquire_reembed_lock(&db, &first)
|
||||
.await
|
||||
.expect("acquire first"),
|
||||
"the first acquirer should win the lock"
|
||||
);
|
||||
|
||||
// A second, concurrent maintainer must not be able to take a held lock.
|
||||
let second = format!("{first}-peer");
|
||||
assert!(
|
||||
!try_acquire_reembed_lock(&db, &second)
|
||||
.await
|
||||
.expect("contend for lock"),
|
||||
"a held lock must not be granted to another owner"
|
||||
);
|
||||
|
||||
// Releasing it (only the holder can) frees it for the next maintainer.
|
||||
release_reembed_lock(&db, &first).await;
|
||||
assert!(
|
||||
try_acquire_reembed_lock(&db, &second)
|
||||
.await
|
||||
.expect("re-acquire after release"),
|
||||
"the lock should be grantable again once released"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user