perf: offload blocking calls to spawn_blocking

- Move headless_chrome PDF rasterization from async context to
  spawn_blocking, keeping tokio worker threads responsive.
- Switch RerankerPool from tokio::sync::Mutex to std::sync::Mutex
  and run TextRerank::rerank inside spawn_blocking, since the
  rerank call is CPU-bound with no .await points.
This commit is contained in:
Per Stark
2026-05-26 15:30:03 +02:00
parent 1927149ce9
commit 6c7b586fc5
2 changed files with 47 additions and 31 deletions
+11 -7
View File
@@ -3,14 +3,14 @@ use std::{
path::{Path, PathBuf},
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
Arc, Mutex,
},
thread::available_parallelism,
};
use common::{error::AppError, utils::config::AppConfig};
use fastembed::{RerankInitOptions, RerankResult, TextRerank};
use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore};
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
use tracing::debug;
static NEXT_ENGINE: AtomicUsize = AtomicUsize::new(0);
@@ -161,11 +161,15 @@ impl RerankerLease {
query: &str,
documents: Vec<String>,
) -> Result<Vec<RerankResult>, AppError> {
// Lock this specific engine so we get &mut TextRerank
let mut guard = self.engine.lock().await;
let query = query.to_owned();
let engine = Arc::clone(&self.engine);
guard
.rerank(query.to_owned(), documents, false, None)
.map_err(|e| AppError::InternalError(e.to_string()))
tokio::task::spawn_blocking(move || {
let mut guard = engine.lock().expect("reranker engine mutex poisoned");
guard
.rerank(query, documents, false, None)
.map_err(|e| AppError::InternalError(e.to_string()))
})
.await?
}
}