release: 1.0.5

This commit is contained in:
Per Stark
2026-06-24 22:02:31 +02:00
parent ba3fd6ed46
commit d273390de8
118 changed files with 989 additions and 690 deletions
+1 -1
View File
@@ -25,7 +25,7 @@ jobs:
with:
primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }}
restore-prefixes-first-match: nix-${{ runner.os }}-
gc-max-store-size-linux: 5G
gc-max-store-size-linux: 10G
- name: Check formatting, clippy lint, unit tests & ort version
run: nix flake check --show-trace
+17 -7
View File
@@ -28,7 +28,7 @@ jobs:
with:
primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }}
restore-prefixes-first-match: nix-${{ runner.os }}-
gc-max-store-size-linux: 5G
gc-max-store-size-linux: 10G
- name: Read ORT version from flake
id: ort_version
@@ -37,6 +37,10 @@ jobs:
- name: Run nix flake check
run: nix flake check --system x86_64-linux
- name: Warm Linux release outputs for cache
if: ${{ !github.event.pull_request }}
run: nix build .#minne-release .#minne-release-windows --no-link -L
build-nix-artifacts:
name: build (${{ matrix.triple }})
needs: [ci]
@@ -49,10 +53,10 @@ jobs:
triple: x86_64-unknown-linux-gnu
nix_package: minne-release
cache_save: false
- runner: macos-latest
- runner: macos-14
triple: aarch64-apple-darwin
nix_package: minne-release
cache_save: true
cache_save: false
- runner: ubuntu-24.04
triple: x86_64-pc-windows-msvc
nix_package: minne-release-windows
@@ -70,8 +74,7 @@ jobs:
with:
primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }}
restore-prefixes-first-match: nix-${{ runner.os }}-
gc-max-store-size-linux: 5G
gc-max-store-size-darwin: 5G
gc-max-store-size-linux: 10G
save: ${{ matrix.cache_save }}
- name: Build release archive (Nix)
@@ -88,7 +91,14 @@ jobs:
ARTIFACT="main-${TRIPLE}.tar.xz"
fi
RELEASE="$(nix path-info ./minne-release)"
cp "$RELEASE/${ARTIFACT}" "$ARTIFACT"
echo "Release output at $RELEASE:"
ls -la "$RELEASE"
mapfile -t BUILT < <(find "$RELEASE" -maxdepth 1 \( -name 'main-*.tar.xz' -o -name 'main-*.zip' \) -print)
if [ "${#BUILT[@]}" -ne 1 ]; then
echo "Expected exactly one release archive in $RELEASE" >&2
exit 1
fi
cp "${BUILT[0]}" "$ARTIFACT"
if command -v sha256sum >/dev/null; then
sha256sum "$ARTIFACT" > "${ARTIFACT}.sha256"
else
@@ -124,7 +134,7 @@ jobs:
with:
primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }}
restore-prefixes-first-match: nix-${{ runner.os }}-
gc-max-store-size-linux: 5G
gc-max-store-size-linux: 10G
save: false
- name: Build Docker image with Nix
+3 -1
View File
@@ -2,6 +2,9 @@
## Unreleased
## 1.0.5 (2026-06-24)
- Infra: CI workflow fixes. CI is now a nix flake check which includes compilation, caching and running tests, clippy, fmt, validation for ort version.
- Docker-compose: The example now references the ghcr image, this is so we can remove the Dockerfile and reducing maintenance scope.
- Refactor: web scraping now uses `servo-fetch` (pure-Rust Servo engine) and PDF rendering uses `pdfium-render` (direct PDFium bindings) — reduces Docker image size by ~300MB, improves startup latency by ~100× for PDF rendering, and provides more stable output
@@ -10,7 +13,6 @@
- Docs: updated architecture, features, and installation docs to reflect the new web processing stack
- Fix: added pre-commit hooks to further maintain code consistency.
- Security: updated some deps because dependabot told me, good bot.
- Security: bump `async-openai` to 0.41.1 (feature-gated types, transcription API rename; removes `backoff` transitive dep)
- Refactor: deduplicated test database setup across common/src/storage/.
- Refactor: split knowledge-graph.js monolith into focused functions.
- Evaluations: simplified crate layout — linear pipeline, sharded-only converted store, in-memory ingestion, `db/` and `cli/` modules; namespace reuse state in corpus manifest (removed `cache/snapshots/`); no legacy JSON/history compatibility (re-run `--warm` after upgrade)
Generated
+1 -1
View File
@@ -5852,7 +5852,7 @@ checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
[[package]]
name = "main"
version = "1.0.4"
version = "1.0.5"
dependencies = [
"anyhow",
"api-router",
+1 -1
View File
@@ -1,7 +1,7 @@
[package]
name = "api-router"
version = "0.1.0"
edition = "2021"
edition = "2024"
license = "AGPL-3.0-or-later"
[lints]
+1 -1
View File
@@ -1,7 +1,7 @@
use axum::{
Json,
http::StatusCode,
response::{IntoResponse, Response},
Json,
};
use common::error::AppError;
use serde::Serialize;
+1 -1
View File
@@ -1,9 +1,9 @@
use api_state::ApiState;
use axum::{
Router,
extract::{DefaultBodyLimit, FromRef},
middleware::from_fn_with_state,
routing::{get, post},
Router,
};
use middleware_api_auth::api_auth;
use routes::{categories::list, ingest::handle, liveness::live, readiness::ready};
+1 -1
View File
@@ -1,4 +1,4 @@
use axum::{extract::State, response::IntoResponse, Extension, Json};
use axum::{Extension, Json, extract::State, response::IntoResponse};
use common::storage::types::user::User;
use crate::{api_state::ApiState, error::ApiErr};
+3 -3
View File
@@ -1,4 +1,4 @@
use axum::{extract::State, http::StatusCode, response::IntoResponse, Extension, Json};
use axum::{Extension, Json, extract::State, http::StatusCode, response::IntoResponse};
use axum_typed_multipart::{FieldData, TryFromMultipart, TypedMultipart};
use common::{
error::AppError,
@@ -6,9 +6,9 @@ use common::{
file_info::FileInfo, ingestion_payload::IngestionPayload, ingestion_task::IngestionTask,
user::User,
},
utils::ingest_limits::{validate_ingest_input, IngestValidationError},
utils::ingest_limits::{IngestValidationError, validate_ingest_input},
};
use futures::{future::try_join_all, TryFutureExt};
use futures::{TryFutureExt, future::try_join_all};
use serde_json::json;
use tempfile::NamedTempFile;
use tracing::info;
+1 -1
View File
@@ -1,4 +1,4 @@
use axum::{http::StatusCode, response::IntoResponse, Json};
use axum::{Json, http::StatusCode, response::IntoResponse};
use serde_json::json;
/// Liveness probe: always returns 200 to indicate the process is running.
+1 -1
View File
@@ -1,4 +1,4 @@
use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
use axum::{Json, extract::State, http::StatusCode, response::IntoResponse};
use serde_json::json;
use tracing::error;
+2 -2
View File
@@ -4,9 +4,9 @@ use std::sync::Arc;
use api_router::{api_routes_v1, api_state::ApiState};
use axum::{
body::{to_bytes, Body},
http::{Request, StatusCode},
Router,
body::{Body, to_bytes},
http::{Request, StatusCode},
};
use common::{
storage::{db::SurrealDbClient, store::StorageManager, types::user::User},
+1 -1
View File
@@ -1,7 +1,7 @@
[package]
name = "common"
version = "0.1.0"
edition = "2021"
edition = "2024"
license = "AGPL-3.0-or-later"
[lints]
+4 -4
View File
@@ -3,14 +3,14 @@ use crate::error::AppError;
use axum_session::{SessionConfig, SessionError, SessionStore};
use axum_session_surreal::SessionSurrealPool;
use futures::Stream;
use include_dir::{include_dir, Dir};
use serde::de::DeserializeOwned;
use include_dir::{Dir, include_dir};
use serde::Serialize;
use serde::de::DeserializeOwned;
use std::{ops::Deref, sync::Arc};
use surrealdb::{
engine::any::{connect, Any},
opt::auth::{Namespace, Root},
Error, Notification, Surreal,
engine::any::{Any, connect},
opt::auth::{Namespace, Root},
};
use surrealdb_migrations::MigrationRunner;
use tracing::debug;
+82 -55
View File
@@ -2,14 +2,14 @@ use std::io::ErrorKind;
use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
use anyhow::{anyhow, Context, Result as AnyResult};
use anyhow::{Context, Result as AnyResult, anyhow};
use bytes::Bytes;
use futures::stream::BoxStream;
use futures::{StreamExt, TryStreamExt};
use object_store::aws::AmazonS3Builder;
use object_store::local::LocalFileSystem;
use object_store::memory::InMemory;
use object_store::{path::Path as ObjPath, ObjectStore};
use object_store::{ObjectStore, path::Path as ObjPath};
use crate::utils::config::{AppConfig, StorageKind};
@@ -461,9 +461,12 @@ pub mod testing {
pub async fn new_s3() -> object_store::Result<Self> {
// Ensure credentials are set for MinIO
// We set these env vars for the process, which AmazonS3Builder will pick up
std::env::set_var("AWS_ACCESS_KEY_ID", "minioadmin");
std::env::set_var("AWS_SECRET_ACCESS_KEY", "minioadmin");
std::env::set_var("AWS_REGION", "us-east-1");
// SAFETY: test setup runs before concurrent S3 client use in this process.
unsafe {
std::env::set_var("AWS_ACCESS_KEY_ID", "minioadmin");
std::env::set_var("AWS_SECRET_ACCESS_KEY", "minioadmin");
std::env::set_var("AWS_REGION", "us-east-1");
}
let cfg = test_config_s3();
let storage = StorageManager::new(&cfg).await?;
@@ -543,10 +546,10 @@ pub mod testing {
impl Drop for TestStorageManager {
fn drop(&mut self) {
// Clean up temporary directories for local storage
if let Some((_, path)) = &self.temp_dir {
if path.exists() {
let _ = std::fs::remove_dir_all(path);
}
if let Some((_, path)) = &self.temp_dir
&& path.exists()
{
let _ = std::fs::remove_dir_all(path);
}
}
}
@@ -690,20 +693,24 @@ mod tests {
assert_eq!(retrieved.as_ref(), data);
// Test exists
assert!(storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?);
assert!(
storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?
);
// Test delete
storage
.delete_prefix("test/data/")
.await
.with_context(|| "delete".to_string())?;
assert!(!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?);
assert!(
!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?
);
Ok(())
}
@@ -741,20 +748,24 @@ mod tests {
.with_context(|| "object directory exists after write".to_string())?;
// Test exists
assert!(storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?);
assert!(
storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?
);
// Test delete
storage
.delete_prefix("test/data/")
.await
.with_context(|| "delete".to_string())?;
assert!(!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?);
assert!(
!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?
);
assert!(
tokio::fs::metadata(&object_dir).await.is_err(),
"object directory should be removed"
@@ -846,12 +857,16 @@ mod tests {
.await
.with_context(|| "list dir1".to_string())?;
assert_eq!(dir1_files.len(), 2);
assert!(dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file1.txt")));
assert!(dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file2.txt")));
assert!(
dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file1.txt"))
);
assert!(
dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file2.txt"))
);
// Test listing non-existent prefix
let empty_files = storage
@@ -918,10 +933,12 @@ mod tests {
.with_context(|| "get".to_string())?;
assert_eq!(retrieved.as_ref(), data);
assert!(storage
.exists(location)
.await
.with_context(|| "exists".to_string())?);
assert!(
storage
.exists(location)
.await
.with_context(|| "exists".to_string())?
);
assert_eq!(*storage.backend_kind(), StorageKind::Memory);
Ok(())
@@ -975,10 +992,12 @@ mod tests {
assert_eq!(retrieved.as_ref(), data);
// Test existence check
assert!(test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?);
assert!(
test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?
);
// Test list
let files = test_storage
@@ -992,10 +1011,12 @@ mod tests {
.delete_prefix("test/storage/")
.await
.with_context(|| "delete".to_string())?;
assert!(!test_storage
.exists(location)
.await
.with_context(|| "exists after delete".to_string())?);
assert!(
!test_storage
.exists(location)
.await
.with_context(|| "exists after delete".to_string())?
);
Ok(())
}
@@ -1019,10 +1040,12 @@ mod tests {
.with_context(|| "get".to_string())?;
assert_eq!(retrieved.as_ref(), data);
assert!(test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?);
assert!(
test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?
);
Ok(())
}
@@ -1119,20 +1142,24 @@ mod tests {
assert_eq!(retrieved.as_ref(), data);
// Test exists
assert!(storage
.exists(&location)
.await
.with_context(|| "exists".to_string())?);
assert!(
storage
.exists(&location)
.await
.with_context(|| "exists".to_string())?
);
// Test delete
storage
.delete_prefix(&format!("{prefix}/"))
.await
.with_context(|| "delete".to_string())?;
assert!(!storage
.exists(&location)
.await
.with_context(|| "exists after delete".to_string())?);
assert!(
!storage
.exists(&location)
.await
.with_context(|| "exists after delete".to_string())?
);
Ok(())
}
+1 -1
View File
@@ -1,4 +1,4 @@
use crate::storage::types::{user::User, StoredObject};
use crate::storage::types::{StoredObject, user::User};
use crate::utils::serde_helpers::deserialize_flexible_id;
use serde::{Deserialize, Serialize};
+4 -3
View File
@@ -315,9 +315,10 @@ impl IngestionTask {
"#;
debug_assert!(lifecycle::pending().reserve().is_ok());
debug_assert!(lifecycle::pending().reserve().is_ok_and(|m| m
.start_processing()
.is_ok_and(|m| m.fail().is_ok_and(|m| m.reserve().is_ok()))));
debug_assert!(lifecycle::pending().reserve().is_ok_and(|m| {
m.start_processing()
.is_ok_and(|m| m.fail().is_ok_and(|m| m.reserve().is_ok()))
}));
let mut result = db
.client
+15 -9
View File
@@ -399,7 +399,9 @@ impl KnowledgeEntity {
if embedding.len() != new_dimensions {
let err_msg = format!(
"CRITICAL: Generated embedding for entity {} has incorrect dimension ({}). Expected {}. Aborting.",
entity.id, embedding.len(), new_dimensions
entity.id,
embedding.len(),
new_dimensions
);
error!("{err_msg}");
return Err(AppError::internal(err_msg));
@@ -864,14 +866,18 @@ mod tests {
let rid_e1 = surrealdb::RecordId::from_table_key(KnowledgeEntity::table_name(), &e1.id);
let rid_e2 = surrealdb::RecordId::from_table_key(KnowledgeEntity::table_name(), &e2.id);
assert!(KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e1)
.await
.with_context(|| "get embedding e1".to_string())?
.is_some());
assert!(KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e2)
.await
.with_context(|| "get embedding e2".to_string())?
.is_some());
assert!(
KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e1)
.await
.with_context(|| "get embedding e1".to_string())?
.is_some()
);
assert!(
KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e2)
.await
.with_context(|| "get embedding e2".to_string())?
.is_some()
);
let results = KnowledgeEntity::vector_search(2, &[0.0, 1.0, 0.0], &db, &user_id)
.await
@@ -287,10 +287,12 @@ mod tests {
.with_context(|| "get entity2 embedding after delete".to_string())?
.is_none()
);
assert!(KnowledgeEntityEmbedding::get_by_record_id(&db, &other_rid)
.await
.with_context(|| "get other embedding after delete".to_string())?
.is_some());
assert!(
KnowledgeEntityEmbedding::get_by_record_id(&db, &other_rid)
.await
.with_context(|| "get other embedding after delete".to_string())?
.is_some()
);
Ok(())
}
@@ -575,12 +575,16 @@ mod tests {
KnowledgeRelationship::delete_relationships_by_source_id(shared_source, user_a, &db)
.await?;
assert!(get_relationship_by_id(&owner_relationship_id, &db)
.await
.is_none());
assert!(get_relationship_by_id(&other_relationship_id, &db)
.await
.is_some());
assert!(
get_relationship_by_id(&owner_relationship_id, &db)
.await
.is_none()
);
assert!(
get_relationship_by_id(&other_relationship_id, &db)
.await
.is_some()
);
Ok(())
}
+12 -12
View File
@@ -223,16 +223,16 @@ impl SystemSettings {
needs_update = true;
}
if let Some(model) = provider_model {
if settings.embedding_model != model {
tracing::info!(
old_model = %settings.embedding_model,
new_model = %model,
"Embedding model changed, updating SystemSettings"
);
settings.embedding_model = model;
needs_update = true;
}
if let Some(model) = provider_model
&& settings.embedding_model != model
{
tracing::info!(
old_model = %settings.embedding_model,
new_model = %model,
"Embedding model changed, updating SystemSettings"
);
settings.embedding_model = model;
needs_update = true;
}
if needs_update {
@@ -719,8 +719,8 @@ mod tests {
}
#[tokio::test]
async fn test_should_change_embedding_length_on_indexes_when_switching_length(
) -> anyhow::Result<()> {
async fn test_should_change_embedding_length_on_indexes_when_switching_length()
-> anyhow::Result<()> {
use crate::utils::embedding::EmbeddingProvider;
let db = setup_test_db().await?;
+4 -2
View File
@@ -4,7 +4,7 @@ use std::fmt::Write;
use crate::storage::indexes::hnsw_index_overwrite_sql;
use crate::storage::types::{
text_chunk_embedding::TextChunkEmbedding, EmbeddingRecord, HasEmbedding,
EmbeddingRecord, HasEmbedding, text_chunk_embedding::TextChunkEmbedding,
};
use crate::utils::embedding::RE_EMBED_BATCH_SIZE;
use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
@@ -216,7 +216,9 @@ impl TextChunk {
if embedding.len() != new_dimensions {
let err_msg = format!(
"CRITICAL: Generated embedding for chunk {} has incorrect dimension ({}). Expected {}. Aborting.",
chunk.id, embedding.len(), new_dimensions
chunk.id,
embedding.len(),
new_dimensions
);
error!("{err_msg}");
return Err(AppError::internal(err_msg));
@@ -235,35 +235,47 @@ mod tests {
.with_context(|| format!("store embedding for {key}"))?;
}
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "get chunk1".to_string())?
.is_some());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "get chunk2".to_string())?
.is_some());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "get chunk_other".to_string())?
.is_some());
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "get chunk1".to_string())?
.is_some()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "get chunk2".to_string())?
.is_some()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "get chunk_other".to_string())?
.is_some()
);
TextChunkEmbedding::delete_by_source_id(source_id, &db)
.await
.with_context(|| "Failed to delete by source_id".to_string())?;
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "check chunk1".to_string())?
.is_none());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "check chunk2".to_string())?
.is_none());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "check chunk_other".to_string())?
.is_some());
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "check chunk1".to_string())?
.is_none()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "check chunk2".to_string())?
.is_none()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "check chunk_other".to_string())?
.is_some()
);
Ok(())
}
+3 -3
View File
@@ -1,8 +1,8 @@
use std::collections::{HashMap, HashSet};
use std::str::FromStr;
use surrealdb::opt::PatchOp;
use surrealdb::RecordId;
use surrealdb::opt::PatchOp;
use uuid::Uuid;
use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
@@ -682,8 +682,8 @@ mod tests {
}
#[tokio::test]
async fn clear_ingested_children_removes_chunks_entities_and_relationships(
) -> anyhow::Result<()> {
async fn clear_ingested_children_removes_chunks_entities_and_relationships()
-> anyhow::Result<()> {
let db = setup_test_db().await?;
let user_id = "clear-user";
let source_id = Uuid::new_v4().to_string();
+2 -2
View File
@@ -3,7 +3,7 @@ use anyhow::anyhow;
use async_trait::async_trait;
use axum_session_auth::Authentication;
use chrono_tz::Tz;
use surrealdb::{engine::any::Any, Surreal};
use surrealdb::{Surreal, engine::any::Any};
use uuid::Uuid;
use super::text_chunk::TextChunk;
@@ -729,7 +729,7 @@ mod tests {
use super::*;
use crate::storage::types::ingestion_payload::IngestionPayload;
use crate::storage::types::ingestion_task::{IngestionTask, TaskState, MAX_ATTEMPTS};
use crate::storage::types::ingestion_task::{IngestionTask, MAX_ATTEMPTS, TaskState};
use std::collections::HashSet;
use crate::test_utils::setup_test_db;
+2 -2
View File
@@ -8,8 +8,8 @@ use crate::storage::{
db::SurrealDbClient,
indexes::{ensure_runtime, rebuild},
types::{
knowledge_entity_embedding::KnowledgeEntityEmbedding, system_settings::SystemSettings,
text_chunk_embedding::TextChunkEmbedding, EmbeddingRecord,
EmbeddingRecord, knowledge_entity_embedding::KnowledgeEntityEmbedding,
system_settings::SystemSettings, text_chunk_embedding::TextChunkEmbedding,
},
};
+8 -2
View File
@@ -198,7 +198,10 @@ pub fn ensure_ort_path() {
exe.join("lib").join("onnxruntime.dll"),
] {
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
// SAFETY: `Once` ensures this runs on a single thread during startup.
unsafe {
env::set_var("ORT_DYLIB_PATH", p);
}
return;
}
}
@@ -210,7 +213,10 @@ pub fn ensure_ort_path() {
};
let p = exe.join("lib").join(name);
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
// SAFETY: `Once` ensures this runs on a single thread during startup.
unsafe {
env::set_var("ORT_DYLIB_PATH", p);
}
}
});
}
+3 -4
View File
@@ -9,7 +9,7 @@ use std::{
use serde::Serialize;
use tracing::warn;
use async_openai::{types::embeddings::CreateEmbeddingRequestArgs, Client};
use async_openai::{Client, types::embeddings::CreateEmbeddingRequestArgs};
use fastembed::{EmbeddingModel, ModelTrait, TextEmbedding, TextInitOptions};
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
@@ -588,9 +588,8 @@ mod tests {
#![allow(clippy::expect_used)]
use super::{
align_fastembed_system_settings, fastembed_model_dimension,
list_fastembed_embedding_models, resolve_fastembed_model_code, EmbeddingError,
DEFAULT_FASTEMBED_MODEL_CODE,
DEFAULT_FASTEMBED_MODEL_CODE, EmbeddingError, align_fastembed_system_settings,
fastembed_model_dimension, list_fastembed_embedding_models, resolve_fastembed_model_code,
};
use crate::storage::types::system_settings::SystemSettings;
use crate::utils::config::{AppConfig, EmbeddingBackend, ParseEmbeddingBackendError};
+7 -7
View File
@@ -47,13 +47,13 @@ pub fn validate_ingest_input(
)));
}
if let Some(content) = content {
if content.len() > config.ingest_max_content_bytes {
return Err(IngestValidationError::PayloadTooLarge(format!(
"content is too large: maximum allowed is {} bytes",
config.ingest_max_content_bytes
)));
}
if let Some(content) = content
&& content.len() > config.ingest_max_content_bytes
{
return Err(IngestValidationError::PayloadTooLarge(format!(
"content is too large: maximum allowed is {} bytes",
config.ingest_max_content_bytes
)));
}
if ctx.len() > config.ingest_max_context_bytes {
+1 -1
View File
@@ -1,4 +1,4 @@
pub use minijinja::{path_loader, Environment, Value};
pub use minijinja::{Environment, Value, path_loader};
pub use minijinja_autoreload::AutoReloader;
pub use minijinja_contrib;
pub use minijinja_embed;
+288
View File
@@ -0,0 +1,288 @@
# CI/CD Roadmap: Nix-First Release Builds
This document tracks the migration from cargo-dist raw `cargo build --release` on bare GitHub runners to Nix-built release artifacts for all platforms. The goal is a single build system (the flake) shared by CI, Docker, and release binaries.
**Status:** Phase 34 complete locally — Nix builds all release targets including Windows cross (`nix build .#minne-release-windows` verified on x86_64-linux). cargo-dist removed from workflow and devenv. GHA tag-push validation pending.
**Decision (2026-06-23):** Drop `x86_64-apple-darwin` (Intel macOS). Ship `aarch64-apple-darwin` only; Intel Mac users can run via Rosetta 2.
---
## Executive Summary
Nix is now the sole compiler for all release binaries. Per-platform `minne-release` flake outputs produce archives compatible with GitHub Releases layout (binaries + `lib/libonnxruntime.*` + docs). The release workflow uses matrix jobs running `nix build` with `cache-nix-action` on every job. cargo-dist has been removed; releases use `gh release create` with CHANGELOG-driven notes.
This fixes the mozangle/clang failure at the root: the flake already wires `libclang`, `bindgenHook`, `llvm`, `python3`, `fontconfig`, and `MOZJS_ARCHIVE` — cargo-dist on bare Ubuntu cannot see any of that without duplicating it in apt/workflow steps.
---
## Current State
### What works
- [x] CI (`nix flake check`) — format, clippy, tests, ort-version gate via Crane `buildDepsOnly`
- [x] Release Docker job — `nix build .#dockerImage`, push to GHCR with dynamic tag from `docker load`
- [x] Release plan job — `nix flake check`, ORT version from flake (no cargo-dist)
- [x] Harmonized native deps in `flake.nix` for CI/Docker (openssl, libglvnd, onnxruntime, fontconfig, bindgen, mozjs)
### What is broken or painful
- [x] ~~cargo-dist Linux build fails without apt/mozjs workarounds~~ — resolved: Nix builds all platforms
- [x] ~~Two build systems: Nix for CI/Docker, cargo + apt/homebrew for dist binaries~~ — resolved: Nix-only release
- [x] ~~Four independent release compiles with no shared Nix store across jobs~~ — resolved: `cache-nix-action` on all release jobs
- [x] ~~`[dist.dependencies.apt]` duplicates flake.nix logic~~ — resolved: `dist-workspace.toml` deleted
- [ ] Release compile time on GHA not yet measured post-migration (expected ~1025 min warm vs ~50110 min cold)
- [ ] GHA tag-push validation pending for macOS and Windows archives
---
## Target Architecture
| Layer | Owner | Notes |
|-------|-------|-------|
| Compile binaries | **Nix** (`minne-pkg` / cross derivations) | Crane + `commonArgs`, per-platform mozjs |
| Bundle ORT + runtime libs | **Nix** (`minne-release`) | Match `include = ["lib"]` layout |
| Create archives | **Nix** or thin shell | `.tar.xz` (Unix), `.zip` (Windows) |
| Publish GitHub Release | **`gh release create`** | CHANGELOG body |
| Docker image | **Nix** (unchanged) | Shares `minne-pkg` derivation with Linux release |
| cargo-dist | **Removed** | Replaced by Nix jobs + `gh release` |
### Release targets (end state)
| Target | Builder | Nix output |
|--------|---------|------------|
| `x86_64-unknown-linux-gnu` | `ubuntu-22.04` native | `.#minne-release` |
| `aarch64-apple-darwin` | `macos-latest` native | `.#minne-release` |
| ~~`x86_64-apple-darwin`~~ | **Dropped** | — |
| `x86_64-pc-windows-msvc` | `ubuntu-22.04` cross | `.#minne-release-windows` |
---
## Per-Platform Build Matrix
| Target | Feasibility | Nix command | Artifact layout | Blockers |
|--------|-------------|-------------|-----------------|----------|
| `x86_64-unknown-linux-gnu` | Ready with modest flake changes | `nix build .#minne-release --system x86_64-linux` | `main-{ver}-x86_64-unknown-linux-gnu.tar.xz``main/`, `server/`, `worker/`, `lib/libonnxruntime.so`, README, LICENSE, CHANGELOG | glibc 2.40 (nixpkgs-unstable) vs Ubuntu 22.04 glibc 2.35; portable runtime bundling needed |
| `aarch64-apple-darwin` | Feasible | `nix build .#minne-release --system aarch64-darwin` | `main-{ver}-aarch64-apple-darwin.tar.xz` + `lib/libonnxruntime.dylib` | Per-system mozjs URL; Darwin `postInstall` assumes Linux today |
| `x86_64-pc-windows-msvc` | Feasible with new cross flake | `nix build .#minne-release-windows` (x86_64-linux host) | `main-{ver}-x86_64-pc-windows-msvc.zip` + `lib/onnxruntime.dll` | Crane + cargo-xwin cross setup; no native Nix-on-Windows for v1 |
### mozjs prebuilt availability (mozjs-sys-v140.10.1-0)
Confirmed for all release triples:
- `libmozjs-x86_64-unknown-linux-gnu.tar.gz`
- `libmozjs-aarch64-apple-darwin.tar.gz`
- `libmozjs-x86_64-pc-windows-msvc.tar.gz`
---
## Caching Strategy
| Layer | Invalidated by | Shared across |
|-------|----------------|---------------|
| Nix store (system deps) | `flake.lock`, `*.nix`, `Cargo.lock` | plan, CI, Docker, all release jobs (per OS) |
| `cargoArtifacts` (`buildDepsOnly`) | `Cargo.lock` dep changes only | minne-pkg, clippy, test, dockerImage, release |
| `minne-pkg` (source) | Application source changes | dockerImage, release |
| cargo-dist `target/` | Version bumps (weak) | Removed — Nix store replaces it |
### Expected release times
| Scenario | Current (cargo-dist) | After migration (Nix) |
|----------|---------------------|-------------------------|
| Cold release (version bump, no cache) | ~50110 min × 4 jobs | ~4590 min × 3 jobs (no Intel Mac) |
| Warm release (source-only, cache hit) | Still ~full rebuild | ~1025 min incremental per OS |
| No-op re-release | Full rebuild | ~25 min if derivations unchanged |
| Docker job (cached) | ~515 min | Unchanged; shares `minne-pkg` with Linux release |
`buildDepsOnly` survives version bumps (version is in flake `minneVersion`, not `Cargo.lock`) — major win over cargo-dist.
---
## Implementation Phases
### Phase 1 — Linux via Nix (highest pain, highest value)
- [x] Add per-system `mozjsArchive` helper with hash map (at minimum fix structure for all platforms)
- [x] Add `nix/minne-release.nix` — bundle ORT + portable runtime libs + docs into archive
- [x] Linux portable runtime lib bundling + `patchelf --set-rpath '$ORIGIN/lib'`
- [x] Replace Linux `build-local-artifacts` steps with `nix build .#minne-release`
- [x] Add `cache-nix-action` to Linux release build job
- [x] Validate glibc portability (test binary on Ubuntu 22.04)
- [x] Remove Linux apt/mozjs/ORT curl workarounds from `.github/workflows/release.yml`
- [x] Archive naming matches prior releases (`main-{triple}.tar.xz`, no version in filename)
### Phase 2 — macOS aarch64
- [x] Platform-conditional `postInstall` in `flake.nix` (Darwin vs Linux wrapping)
- [x] Add `nix/minne-release-darwin.nix` — ORT + runtime dylibs + docs archive
- [x] macOS `build-local-artifacts` uses `nix build .#minne-release` on `macos-latest`
- [x] `cache-nix-action` on macOS release build job
- [x] Drop `x86_64-apple-darwin` target (was in `dist-workspace.toml`, now deleted)
- [ ] Test archive on clean macOS VM / GHA release run
- [x] Update `docs/installation.md` to note aarch64-only macOS binary (Rosetta 2 for Intel Macs)
### Phase 3 — Windows cross from Linux
- [x] Add `minne-release-windows` cross derivation (Crane + cargo-xwin)
- [x] Add `nix/clang-cl-msvc-link-wrapper.sh` for mozangle DLL links under clang-cl
- [x] Windows GHA job on `ubuntu-22.04` (cross-build, not Nix-on-Windows)
- [x] Bundle `onnxruntime.dll` in release zip (match cargo-dist flat layout)
- [x] Fenix `rust-std` for `x86_64-pc-windows-msvc` via `fenix.combine`
- [x] Local cross-build verified: `nix build .#minne-release-windows` on x86_64-linux
- [ ] Test archive on Windows VM / GHA release run
### Phase 4 — Cleanup
- [x] Remove cargo-dist compile steps from release workflow
- [x] Delete `dist-workspace.toml`
- [x] Simplify CI to `nix flake check` only (drop `cargo-dist plan`)
- [x] Replace `host`/cargo-dist with `gh release create` + CHANGELOG
- [x] Remove `pkgs.cargo-dist` from `devenv.nix`
- [x] Update `AGENTS.md` release checklist
- [ ] Update README release badges/docs if workflow structure changes
---
## Flake Changes (outline)
New/modified outputs:
```nix
# Per-system mozjs (replace hardcoded Linux x86_64)
mozjsTarget = { "x86_64-linux" = "x86_64-unknown-linux-gnu"; ... }.${system};
mozjsArchive = pkgs.fetchurl { url = ".../libmozjs-${mozjsTarget}.tar.gz"; hash = mozjsHashes.${system}; };
# Platform-conditional postInstall (Linux LD_LIBRARY_PATH vs Darwin)
# NEW: release archive derivation
packages.minne-release = callPackage ./nix/minne-release.nix { inherit minne-pkg minneVersion ortVersion; };
# NEW: Windows cross (x86_64-linux host only)
packages.minne-release-windows = ...;
```
New file: `nix/minne-release.nix` — copies stripped binaries, stages `lib/libonnxruntime.{so,dylib}`, optional runtime `.so` copies, includes README/LICENSE/CHANGELOG, builds `.tar.xz` / `.zip`.
Optional: `devShells.dist` for local release-build debugging.
---
## Workflow Changes (outline)
Target `release.yml` structure:
```
plan:
- nix flake check, nix eval ortVersion
- output tag from github.ref (no hardcoded versions)
build-nix-artifacts: # replaces build-local-artifacts
matrix: linux | macos-aarch64 | windows-cross
- determinate-nix + cache-nix-action on ALL jobs
- nix build .#${attr} --system ${system} -L
- upload: main-*-{triple}.tar.xz / .zip
build_and_push_docker_image: # unchanged
release: # replaces build-global-artifacts + host
- download artifacts
- gh release create with CHANGELOG body
```
Artifact naming: match current convention for backwards compatibility — `main-{version}-{triple}.tar.xz` (Unix) / `.zip` (Windows).
---
## cargo-dist Fate
**Status:** Removed (Option B implemented in Phase 4).
| Option | Verdict |
|--------|---------|
| A) Nix builds → cargo-dist packages only | No clean skip-compile mode; high friction |
| **B) Replace with custom Nix jobs + `gh release`** | **Implemented** |
| C) `build-local-artifacts = false` + custom jobs | Experimental; superseded by Option B |
---
## Task Checklist (with complexity)
| # | Task | Size | Phase | Done |
|---|------|------|-------|------|
| 1 | `mozjsArchive` per `system` with hash map | S | 1 | [x] |
| 2 | Platform-conditional `postInstall` in flake | S | 12 | [x] |
| 3 | `nix/minne-release.nix` archive bundler | M | 1 | [x] |
| 4 | Linux portable runtime lib bundling + patchelf | M | 1 | [x] |
| 5 | Replace Linux `build-local-artifacts` with Nix job | S | 1 | [x] |
| 6 | Add `cache-nix-action` to all release build jobs | S | 13 | [x] |
| 7 | glibc portability test + fix | M | 1 | [x] |
| 8 | Darwin release bundle + macOS GHA job | M | 2 | [x] |
| 9 | Drop `x86_64-apple-darwin` from targets | S | 2 | [x] |
| 10 | Windows cross flake (`minne-release-windows`) | L | 3 | [x] |
| 11 | Windows GHA job | S | 3 | [x] |
| 12 | Replace `host`/cargo-dist with `gh release` | S | 4 | [x] |
| 13 | Remove apt deps, ORT curl, cargo-dist install | S | 4 | [x] |
| 14 | Update docs/AGENTS release checklist | S | 4 | [x] |
S = hours1 day, M = 24 days, L = 12 weeks
---
## Risks & Blockers
| Risk | Severity | Mitigation | Resolved |
|------|----------|------------|----------|
| glibc compatibility (nixpkgs 2.40 vs Ubuntu 22.04 2.35) | High | Bundle runtime libs in `lib/` + `LD_LIBRARY_PATH` wrappers; bundled glibc interpreter | [x] |
| mozjs per-platform hashes drift on `Cargo.lock` bump | Medium | Centralize in `mozjsHashes` attrset; document bump procedure | [ ] |
| Darwin `postInstall` assumes Linux (`LD_LIBRARY_PATH`, `libglvnd`) | Medium | Platform-conditional wrapping in flake | [x] |
| Windows cross complexity (Crane + cargo-xwin) | MediumHigh | cargo-xwin env + clang-cl wrapper for mozangle; Dbghelp.lib case symlink | [x] |
| Nix on macOS GHA speed | Medium | cache-nix-action; larger runner if needed | [ ] |
| Codesigning / notarization (macOS) | Low | Not required for CLI today; document `xattr` workaround; revisit if needed | [ ] |
| musl target (`x86_64-unknown-linux-musl`) | N/A | mozjs/servo stack is glibc-oriented; stay on `*-linux-gnu` unless explicitly requested | [ ] |
| ORT version drift | Low | Existing `ortVersion` gate in flake + devenv | [x] |
---
## Open Questions
1. **glibc portability strategy** — Bundle runtime libs in `lib/` (preferred for portability) vs pin `nixpkgs` to an older release channel for release builds vs document minimum distro? Need a test matrix: Ubuntu 22.04, Debian 12, Fedora current.
2. **Archive format** — Confirmed: `.tar.xz` (Unix), `.zip` (Windows); naming `main-{triple}.*` (no version in filename).
3. **Binary scope** — Release all three binaries (`main`, `server`, `worker`) in one archive per platform (unchanged from prior cargo-dist behavior).
4. **PR artifact builds** — Not implemented; cargo-dist `pr-run-mode` was disabled. Revisit if PR smoke-test artifacts are wanted.
5. **Cachix** — Deferred; `cache-nix-action` on all release jobs is sufficient for now.
6. **Windows cross approach** — Resolved: Crane + offline xwin MSVC cache + fenix `rust-std` + clang-cl/lld-link shims (`nix build .#minne-release-windows` verified locally).
7. **Version source of truth** — Release workflow reads version from flake (`minneVersion`).
8. **cargo-dist removal timing** — Resolved: removed in Phase 4.
9. **Intel Mac deprecation communication** — Done: `docs/installation.md` notes aarch64-only + Rosetta 2.
---
## Success Criteria
After implementation:
- [x] Release workflow no longer runs raw `cargo build --release` on bare GitHub runners
- [x] Native deps (clang, mozjs, onnxruntime, etc.) come from flake/Nix, not apt
- [x] Linux, macOS (aarch64), and Windows release binaries are produced via Nix
- [x] Docker and release binaries share maximum Nix store cache (`cache-nix-action` on all jobs)
- [x] No hardcoded version strings in `release.yml`
- [ ] Warm release compile time materially improved (~1025 min/platform vs ~50110 min today) — pending GHA measurement
- [ ] macOS and Windows archives validated on clean VM / GHA tag-push release run
---
## References
- [Crane cross-windows example](https://crane.dev/examples/cross-windows.html)
- [Crane discussion: MSVC / cargo-xwin](https://github.com/ipetkov/crane/discussions/555)
- [cargo-dist CI customization](https://axodotdev.github.io/cargo-dist/book/ci/customizing.html)
- [servo/mozjs releases](https://github.com/servo/mozjs/releases)
- Project files: `flake.nix`, `.github/workflows/release.yml`, `devenv.nix`, `nix/minne-release*.nix`
+1 -1
View File
@@ -1,7 +1,7 @@
[package]
name = "evaluations"
version = "0.1.0"
edition = "2021"
edition = "2024"
[lints]
workspace = true
+45 -46
View File
@@ -3,7 +3,7 @@ use std::{
path::{Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use clap::{Args, Parser, ValueEnum};
use crate::datasets::DatasetKind;
@@ -394,26 +394,26 @@ impl Config {
));
}
if let Some(k) = self.retrieval.chunk_rrf_k {
if k <= 0.0 || !k.is_finite() {
return Err(anyhow!(
"--chunk-rrf-k must be a positive, finite number (got {k})"
));
}
if let Some(k) = self.retrieval.chunk_rrf_k
&& (k <= 0.0 || !k.is_finite())
{
return Err(anyhow!(
"--chunk-rrf-k must be a positive, finite number (got {k})"
));
}
if let Some(weight) = self.retrieval.chunk_rrf_vector_weight {
if weight < 0.0 || !weight.is_finite() {
return Err(anyhow!(
"--chunk-rrf-vector-weight must be a non-negative, finite number (got {weight})"
));
}
if let Some(weight) = self.retrieval.chunk_rrf_vector_weight
&& (weight < 0.0 || !weight.is_finite())
{
return Err(anyhow!(
"--chunk-rrf-vector-weight must be a non-negative, finite number (got {weight})"
));
}
if let Some(weight) = self.retrieval.chunk_rrf_fts_weight {
if weight < 0.0 || !weight.is_finite() {
return Err(anyhow!(
"--chunk-rrf-fts-weight must be a non-negative, finite number (got {weight})"
));
}
if let Some(weight) = self.retrieval.chunk_rrf_fts_weight
&& (weight < 0.0 || !weight.is_finite())
{
return Err(anyhow!(
"--chunk-rrf-fts-weight must be a non-negative, finite number (got {weight})"
));
}
if self.concurrency == 0 {
@@ -426,16 +426,16 @@ impl Config {
));
}
if let Some(query_model) = &self.query_model {
if query_model.trim().is_empty() {
return Err(anyhow!("--query-model requires a non-empty model name"));
}
if let Some(query_model) = &self.query_model
&& query_model.trim().is_empty()
{
return Err(anyhow!("--query-model requires a non-empty model name"));
}
if let Some(grow) = self.slice_grow {
if grow == 0 {
return Err(anyhow!("--slice-grow must be greater than zero"));
}
if let Some(grow) = self.slice_grow
&& grow == 0
{
return Err(anyhow!("--slice-grow must be greater than zero"));
}
if self.negative_multiplier <= 0.0 || !self.negative_multiplier.is_finite() {
@@ -465,12 +465,11 @@ impl Config {
}
// Handle perf log dir env var fallback
if self.perf_log_dir.is_none() {
if let Ok(dir) = env::var("EVAL_PERF_LOG_DIR") {
if !dir.trim().is_empty() {
self.perf_log_dir = Some(PathBuf::from(dir));
}
}
if self.perf_log_dir.is_none()
&& let Ok(dir) = env::var("EVAL_PERF_LOG_DIR")
&& !dir.trim().is_empty()
{
self.perf_log_dir = Some(PathBuf::from(dir));
}
Ok(())
@@ -480,10 +479,10 @@ impl Config {
let catalog = crate::datasets::catalog()?;
let entry = catalog.dataset(self.dataset.id())?;
if self.slice.is_none() {
if let Some(default_slice) = entry.slices.first() {
self.slice = Some(default_slice.id.clone());
}
if self.slice.is_none()
&& let Some(default_slice) = entry.slices.first()
{
self.slice = Some(default_slice.id.clone());
}
let Some(slice_id) = self.slice.as_deref() else {
@@ -498,11 +497,11 @@ impl Config {
return Ok(());
}
if let Some(limit) = slice.limit {
if self.limit_arg == 200 {
self.limit_arg = limit;
self.limit = Some(limit);
}
if let Some(limit) = slice.limit
&& self.limit_arg == 200
{
self.limit_arg = limit;
self.limit = Some(limit);
}
if self.corpus_limit.is_none() {
self.corpus_limit = slice.corpus_limit;
@@ -514,10 +513,10 @@ impl Config {
self.llm_mode = include_unanswerable;
self.retrieval.require_verified_chunks = !include_unanswerable;
}
if let Some(multiplier) = slice.negative_multiplier {
if negative_multiplier_is_default(self.negative_multiplier) {
self.negative_multiplier = multiplier;
}
if let Some(multiplier) = slice.negative_multiplier
&& negative_multiplier_is_default(self.negative_multiplier)
{
self.negative_multiplier = multiplier;
}
Ok(())
}
+2 -2
View File
@@ -9,8 +9,8 @@ use crate::{
args::Config,
corpus::{self, CorpusCacheConfig},
datasets::{
beir_subset_store_summary, beir_subset_stores_ready, content_checksum_for_layout,
detect_layout, mix_content_checksum, store_dir_for, ConvertedLayout, DatasetKind,
ConvertedLayout, DatasetKind, beir_subset_store_summary, beir_subset_stores_ready,
content_checksum_for_layout, detect_layout, mix_content_checksum, store_dir_for,
},
db::{connect_eval_db, default_database, default_namespace, namespace_has_corpus},
slice::{self, ledger_target},
+3 -2
View File
@@ -8,8 +8,9 @@ pub use orchestrator::{
load_cached_manifest, persist_corpus_manifest,
};
pub use store::{
seed_manifest_into_db, window_manifest, CorpusHandle, CorpusManifest, CorpusMetadata,
CorpusQuestion, NamespaceSeedRecord, ParagraphShard, ParagraphShardStore, MANIFEST_VERSION,
CorpusHandle, CorpusManifest, CorpusMetadata, CorpusQuestion, MANIFEST_VERSION,
NamespaceSeedRecord, ParagraphShard, ParagraphShardStore, seed_manifest_into_db,
window_manifest,
};
pub fn make_ingestion_config(config: &crate::args::Config) -> ingestion_pipeline::IngestionConfig {
+3 -3
View File
@@ -6,14 +6,14 @@ use std::{
sync::Arc,
};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use async_openai::Client;
use chrono::Utc;
use common::{
storage::{
db::SurrealDbClient,
store::{DynStorage, StorageManager},
types::{ingestion_payload::IngestionPayload, ingestion_task::IngestionTask, StoredObject},
types::{StoredObject, ingestion_payload::IngestionPayload, ingestion_task::IngestionTask},
},
utils::config::{AppConfig, StorageKind},
};
@@ -31,7 +31,7 @@ use crate::{
use crate::corpus::{
CorpusCacheConfig, CorpusHandle, CorpusManifest, CorpusMetadata, CorpusQuestion,
ParagraphShard, ParagraphShardStore, MANIFEST_VERSION,
MANIFEST_VERSION, ParagraphShard, ParagraphShardStore,
};
const INGESTION_SPEC_VERSION: u32 = 2;
+6 -5
View File
@@ -5,16 +5,17 @@ use std::{
path::PathBuf,
};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, Utc};
use common::storage::{
db::SurrealDbClient,
types::{
knowledge_entity::KnowledgeEntity, knowledge_relationship::KnowledgeRelationship,
text_chunk::TextChunk, text_content::TextContent, StoredObject,
StoredObject, knowledge_entity::KnowledgeEntity,
knowledge_relationship::KnowledgeRelationship, text_chunk::TextChunk,
text_content::TextContent,
},
};
use ingestion_pipeline::{persist_artifacts, IngestionTuning, PipelineArtifacts};
use ingestion_pipeline::{IngestionTuning, PipelineArtifacts, persist_artifacts};
use serde::Deserialize;
use tracing::{debug, warn};
@@ -304,7 +305,7 @@ impl ParagraphShardStore {
Ok(file) => file,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(err) => {
return Err(err).with_context(|| format!("opening shard {}", path.display()))
return Err(err).with_context(|| format!("opening shard {}", path.display()));
}
};
let reader = BufReader::new(file);
+5 -5
View File
@@ -5,7 +5,7 @@ use std::{
path::{Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use serde::Deserialize;
use tracing::warn;
@@ -138,10 +138,10 @@ pub fn convert_beir_documents(
continue;
};
if let Some(filter) = doc_ids {
if !filter.contains(&best.doc_id) {
continue;
}
if let Some(filter) = doc_ids
&& !filter.contains(&best.doc_id)
{
continue;
}
let Some(&paragraph_slot) = paragraph_index.get(&best.doc_id) else {
+2 -3
View File
@@ -1,17 +1,16 @@
use std::collections::{HashMap, HashSet};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use sha2::{Digest, Sha256};
use tracing::info;
use super::{
beir,
BEIR_DATASETS, ConvertedDataset, DatasetKind, DatasetMetadata, beir,
checksum::hash_file,
store::{
self, build_dataset_from_catalog, paragraph_path, read_meta, store_dir_for,
upsert_sharded_paragraphs, write_sharded,
},
ConvertedDataset, DatasetKind, DatasetMetadata, BEIR_DATASETS,
};
use crate::{args::Config, slice};
+15 -17
View File
@@ -112,10 +112,10 @@ pub fn write_sidecar(content_path: &Path, sha256: &str) -> Result<()> {
#[cfg(test)]
pub fn content_checksum(content_path: &Path) -> Result<String> {
let sidecar_path = ChecksumSidecar::sidecar_path(content_path);
if let Some(sidecar) = read_sidecar(&sidecar_path)? {
if sidecar.is_valid_for(content_path) {
return Ok(sidecar.sha256);
}
if let Some(sidecar) = read_sidecar(&sidecar_path)?
&& sidecar.is_valid_for(content_path)
{
return Ok(sidecar.sha256);
}
let sha256 = hash_file(content_path)?;
write_sidecar(content_path, &sha256)?;
@@ -125,19 +125,17 @@ pub fn content_checksum(content_path: &Path) -> Result<String> {
pub fn store_aggregate_checksum(store_dir: &Path) -> Result<String> {
let marker = store_dir.join("checksum.sha256");
let meta = store_dir.join("meta.json");
if marker.is_file() && meta.is_file() {
if let (Ok(marker_meta), Ok(meta_meta)) = (marker.metadata(), meta.metadata()) {
if marker_meta
.modified()
.ok()
.zip(meta_meta.modified().ok())
.is_some_and(|(marker_modified, meta_modified)| marker_modified >= meta_modified)
{
if let Some(sidecar) = read_sidecar(&marker)? {
return Ok(sidecar.sha256);
}
}
}
if marker.is_file()
&& meta.is_file()
&& let (Ok(marker_meta), Ok(meta_meta)) = (marker.metadata(), meta.metadata())
&& marker_meta
.modified()
.ok()
.zip(meta_meta.modified().ok())
.is_some_and(|(marker_modified, meta_modified)| marker_modified >= meta_modified)
&& let Some(sidecar) = read_sidecar(&marker)?
{
return Ok(sidecar.sha256);
}
let mut entries = Vec::new();
+16 -19
View File
@@ -4,12 +4,11 @@ use anyhow::{Context, Result};
use tracing::info;
use super::{
catalog,
ConvertedDataset, DatasetKind, catalog,
store::{
self, build_dataset_from_catalog, detect_layout, read_meta, store_dir_for, write_sharded,
ConvertedLayout,
self, ConvertedLayout, build_dataset_from_catalog, detect_layout, read_meta, store_dir_for,
write_sharded,
},
ConvertedDataset, DatasetKind,
};
use crate::{
args::Config,
@@ -69,21 +68,19 @@ fn load_from_store(
let meta = read_meta(store_dir)?;
validate_metadata_fields(&meta.metadata, dataset_kind, config)?;
if allow_partial {
if let Some(paragraph_ids) = slice_paragraph_ids_for_fast_path(config)? {
let unique: HashSet<String> = paragraph_ids.into_iter().collect();
info!(
paragraphs = unique.len(),
store = %store_dir.display(),
"Loading slice-addressed paragraphs from sharded converted store"
);
let dataset = build_dataset_from_catalog(store_dir, &unique)?;
return Ok(LoadedDataset {
dataset,
content_checksum: checksum,
partial: true,
});
}
if allow_partial && let Some(paragraph_ids) = slice_paragraph_ids_for_fast_path(config)? {
let unique: HashSet<String> = paragraph_ids.into_iter().collect();
info!(
paragraphs = unique.len(),
store = %store_dir.display(),
"Loading slice-addressed paragraphs from sharded converted store"
);
let dataset = build_dataset_from_catalog(store_dir, &unique)?;
return Ok(LoadedDataset {
dataset,
content_checksum: checksum,
partial: true,
});
}
info!(
+5 -3
View File
@@ -13,7 +13,7 @@ use std::{
str::FromStr,
};
use anyhow::{anyhow, bail, Context, Result};
use anyhow::{Context, Result, anyhow, bail};
use chrono::{DateTime, TimeZone, Utc};
use clap::ValueEnum;
use once_cell::sync::OnceCell;
@@ -226,7 +226,7 @@ pub use beir_mix::{beir_subset_store_summary, beir_subset_stores_ready, mix_cont
pub use checksum::store_aggregate_checksum;
pub use loader::{prebuild_catalog_slices, prepare_dataset};
pub use store::{
content_checksum_for_layout, detect_layout, store_dir_for, write_sharded, ConvertedLayout,
ConvertedLayout, content_checksum_for_layout, detect_layout, store_dir_for, write_sharded,
};
pub fn catalog() -> Result<&'static DatasetCatalog> {
@@ -383,7 +383,9 @@ impl FromStr for DatasetKind {
"scifact" => Ok(Self::Scifact),
"nq-beir" | "natural-questions-beir" => Ok(Self::NqBeir),
other => {
anyhow::bail!("unknown dataset '{other}'. Expected one of: squad, natural-questions, beir, fever, fiqa, hotpotqa, nfcorpus, quora, trec-covid, scifact, nq-beir.")
anyhow::bail!(
"unknown dataset '{other}'. Expected one of: squad, natural-questions, beir, fever, fiqa, hotpotqa, nfcorpus, quora, trec-covid, scifact, nq-beir."
)
}
}
}
+3 -3
View File
@@ -5,14 +5,14 @@ use std::{
path::{Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use tracing::info;
use super::{
checksum::store_aggregate_checksum, ConvertedDataset, ConvertedParagraph, ConvertedQuestion,
DatasetMetadata,
ConvertedDataset, ConvertedParagraph, ConvertedQuestion, DatasetMetadata,
checksum::store_aggregate_checksum,
};
use crate::slice;
+2 -2
View File
@@ -1,10 +1,10 @@
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use chrono::Utc;
use common::{
storage::{
db::SurrealDbClient,
types::user::{Theme, User},
types::StoredObject,
types::user::{Theme, User},
},
utils::embedding::EmbeddingProvider,
};
+1 -1
View File
@@ -1,6 +1,6 @@
use std::{collections::HashMap, fs, path::Path};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use common::storage::{db::SurrealDbClient, types::text_chunk::TextChunk};
use crate::{args::Config, corpus, db::connect_eval_db};
+15 -16
View File
@@ -17,36 +17,35 @@ mod types;
use anyhow::Context;
use tokio::runtime::Builder;
use tracing::info;
use tracing_subscriber::{fmt, EnvFilter};
use tracing_subscriber::{EnvFilter, fmt};
/// Configure `SurrealDB` environment variables for optimal performance
#[allow(clippy::arithmetic_side_effects, clippy::unwrap_used)]
fn configure_surrealdb_performance(cpu_count: usize) {
let indexing_batch_size = std::env::var("SURREAL_INDEXING_BATCH_SIZE")
.unwrap_or_else(|_| (cpu_count * 2).to_string());
std::env::set_var("SURREAL_INDEXING_BATCH_SIZE", indexing_batch_size);
let max_order_queue = std::env::var("SURREAL_MAX_ORDER_LIMIT_PRIORITY_QUEUE_SIZE")
.unwrap_or_else(|_| (cpu_count * 4).to_string());
std::env::set_var(
"SURREAL_MAX_ORDER_LIMIT_PRIORITY_QUEUE_SIZE",
max_order_queue,
);
let websocket_concurrent = std::env::var("SURREAL_WEBSOCKET_MAX_CONCURRENT_REQUESTS")
.unwrap_or_else(|_| cpu_count.to_string());
std::env::set_var(
"SURREAL_WEBSOCKET_MAX_CONCURRENT_REQUESTS",
websocket_concurrent,
);
let websocket_buffer = std::env::var("SURREAL_WEBSOCKET_RESPONSE_BUFFER_SIZE")
.unwrap_or_else(|_| (cpu_count * 8).to_string());
std::env::set_var("SURREAL_WEBSOCKET_RESPONSE_BUFFER_SIZE", websocket_buffer);
let transaction_cache = std::env::var("SURREAL_TRANSACTION_CACHE_SIZE")
.unwrap_or_else(|_| (cpu_count * 16).to_string());
std::env::set_var("SURREAL_TRANSACTION_CACHE_SIZE", transaction_cache);
// SAFETY: single-threaded setup before SurrealDB clients are created.
unsafe {
std::env::set_var("SURREAL_INDEXING_BATCH_SIZE", indexing_batch_size);
std::env::set_var(
"SURREAL_MAX_ORDER_LIMIT_PRIORITY_QUEUE_SIZE",
max_order_queue,
);
std::env::set_var(
"SURREAL_WEBSOCKET_MAX_CONCURRENT_REQUESTS",
websocket_concurrent,
);
std::env::set_var("SURREAL_WEBSOCKET_RESPONSE_BUFFER_SIZE", websocket_buffer);
std::env::set_var("SURREAL_TRANSACTION_CACHE_SIZE", transaction_cache);
}
info!(
indexing_batch_size = %std::env::var("SURREAL_INDEXING_BATCH_SIZE").unwrap(),
+1 -1
View File
@@ -1,7 +1,7 @@
use std::sync::Arc;
use anyhow::{Context, Result};
use async_openai::{config::OpenAIConfig, Client};
use async_openai::{Client, config::OpenAIConfig};
const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
+1 -1
View File
@@ -4,7 +4,7 @@ use std::{
time::{Duration, Instant},
};
use anyhow::{anyhow, Result};
use anyhow::{Result, anyhow};
use async_openai::Client;
use common::{
storage::{
+6 -6
View File
@@ -16,12 +16,12 @@ pub(crate) async fn finalize(ctx: &mut EvaluationContext<'_>) -> anyhow::Result<
);
let started = Instant::now();
if let Some(path) = ctx.diagnostics_path.as_ref() {
if ctx.diagnostics_enabled {
write_chunk_diagnostics(path.as_path(), &ctx.diagnostics_output)
.await
.with_context(|| format!("writing chunk diagnostics to {}", path.display()))?;
}
if let Some(path) = ctx.diagnostics_path.as_ref()
&& ctx.diagnostics_enabled
{
write_chunk_diagnostics(path.as_path(), &ctx.diagnostics_output)
.await
.with_context(|| format!("writing chunk diagnostics to {}", path.display()))?;
}
info!(
@@ -40,8 +40,8 @@ pub(crate) async fn prepare_corpus(ctx: &mut EvaluationContext<'_>) -> anyhow::R
if !config.reseed_slice {
let requested_cases = window.cases.len();
if let Some(manifest) = corpus::load_cached_manifest(&base_dir)? {
if can_reuse_namespace(
if let Some(manifest) = corpus::load_cached_manifest(&base_dir)?
&& can_reuse_namespace(
ctx.db()?,
&manifest,
&embedding_provider,
@@ -51,28 +51,27 @@ pub(crate) async fn prepare_corpus(ctx: &mut EvaluationContext<'_>) -> anyhow::R
requested_cases,
)
.await?
{
info!(
cache = %base_dir.display(),
namespace = ctx.namespace.as_str(),
database = ctx.database.as_str(),
"Namespace already seeded; reusing cached corpus manifest"
);
let corpus_handle = corpus::corpus_handle_from_manifest(manifest, base_dir);
ctx.corpus_handle = Some(corpus_handle);
ctx.expected_fingerprint = Some(expected_fingerprint);
ctx.ingestion_duration_ms = 0;
{
info!(
cache = %base_dir.display(),
namespace = ctx.namespace.as_str(),
database = ctx.database.as_str(),
"Namespace already seeded; reusing cached corpus manifest"
);
let corpus_handle = corpus::corpus_handle_from_manifest(manifest, base_dir);
ctx.corpus_handle = Some(corpus_handle);
ctx.expected_fingerprint = Some(expected_fingerprint);
ctx.ingestion_duration_ms = 0;
let elapsed = started.elapsed();
ctx.record_stage_duration(stage, elapsed);
info!(
evaluation_stage = stage.label(),
duration_ms = elapsed.as_millis(),
"completed evaluation stage"
);
let elapsed = started.elapsed();
ctx.record_stage_duration(stage, elapsed);
info!(
evaluation_stage = stage.label(),
duration_ms = elapsed.as_millis(),
"completed evaluation stage"
);
return Ok(());
}
return Ok(());
}
}
+12 -12
View File
@@ -1,6 +1,6 @@
use std::time::Instant;
use anyhow::{anyhow, Context};
use anyhow::{Context, anyhow};
use tracing::info;
use crate::{
@@ -9,7 +9,7 @@ use crate::{
openai,
settings::{enforce_system_settings, load_or_init_system_settings},
};
use common::utils::embedding::{default_embedding_pool_size, EmbeddingProvider};
use common::utils::embedding::{EmbeddingProvider, default_embedding_pool_size};
use super::super::context::{EvalStage, EvaluationContext};
@@ -65,16 +65,16 @@ pub(crate) async fn prepare_db(ctx: &mut EvaluationContext<'_>) -> anyhow::Resul
let (mut settings, settings_missing) =
load_or_init_system_settings(&db, provider_dimension).await?;
if config.embedding_backend == EmbeddingBackend::FastEmbed {
if let Some(model_code) = embedding_provider.model_code() {
let sanitized = sanitize_model_code(&model_code);
let path = config.cache_dir.join(format!("{sanitized}.json"));
if config.force_convert && path.exists() {
tokio::fs::remove_file(&path)
.await
.with_context(|| format!("removing stale cache {}", path.display()))
.ok();
}
if config.embedding_backend == EmbeddingBackend::FastEmbed
&& let Some(model_code) = embedding_provider.model_code()
{
let sanitized = sanitize_model_code(&model_code);
let path = config.cache_dir.join(format!("{sanitized}.json"));
if config.force_convert && path.exists() {
tokio::fs::remove_file(&path)
.await
.with_context(|| format!("removing stale cache {}", path.display()))
.ok();
}
}
@@ -1,6 +1,6 @@
use std::time::Instant;
use anyhow::{anyhow, Context};
use anyhow::{Context, anyhow};
use common::storage::types::system_settings::SystemSettings;
use tracing::{info, warn};
@@ -1,6 +1,6 @@
use std::{collections::HashSet, sync::Arc, time::Instant};
use anyhow::{anyhow, Context};
use anyhow::{Context, anyhow};
use common::storage::types::StoredObject;
use futures::stream::{self, StreamExt};
use tracing::{debug, info};
@@ -9,8 +9,8 @@ use crate::{
cases::SeededCase,
context_stats,
types::{
adapt_retrieval_output, build_case_diagnostics, text_contains_answer, CaseDiagnostics,
CaseSummary, RetrievedSummary,
CaseDiagnostics, CaseSummary, RetrievedSummary, adapt_retrieval_output,
build_case_diagnostics, text_contains_answer,
},
};
use retrieval_pipeline::{
@@ -391,9 +391,5 @@ fn calculate_ndcg(retrieved: &[RetrievedSummary], k: usize) -> f64 {
idcg += rel / (f64::from(i) + 2.0).log2();
}
if idcg == 0.0 {
0.0
} else {
dcg / idcg
}
if idcg == 0.0 { 0.0 } else { dcg / idcg }
}
+2 -2
View File
@@ -4,8 +4,8 @@ use chrono::Utc;
use tracing::info;
use crate::types::{
build_stage_latency_breakdown, compute_latency_stats, EvaluationSummary, PerformanceTimings,
RetrievedContextStats,
EvaluationSummary, PerformanceTimings, RetrievedContextStats, build_stage_latency_breakdown,
compute_latency_stats,
};
use super::super::context::{EvalStage, EvaluationContext};
+3 -7
View File
@@ -8,8 +8,8 @@ use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use crate::types::{
format_timestamp, CaseSummary, EvaluationStageTimings, EvaluationSummary, LatencyStats,
RetrievalContextStats, StageLatencyBreakdown,
CaseSummary, EvaluationStageTimings, EvaluationSummary, LatencyStats, RetrievalContextStats,
StageLatencyBreakdown, format_timestamp,
};
#[derive(Debug)]
@@ -804,11 +804,7 @@ fn prettify_stage(label: &str) -> String {
}
fn bool_badge(value: bool) -> &'static str {
if value {
""
} else {
""
}
if value { "" } else { "" }
}
fn render_retrieved(entries: &[RetrievedSnippet]) -> String {
+9 -9
View File
@@ -24,15 +24,15 @@ pub(crate) async fn enforce_system_settings(
updated_settings.embedding_dimensions = provider_dimension as u32;
needs_settings_update = true;
}
if let Some(query_override) = config.query_model.as_deref() {
if settings.query_model != query_override {
info!(
model = query_override,
"Overriding system query model for this run"
);
updated_settings.query_model = query_override.to_string();
needs_settings_update = true;
}
if let Some(query_override) = config.query_model.as_deref()
&& settings.query_model != query_override
{
info!(
model = query_override,
"Overriding system query model for this run"
);
updated_settings.query_model = query_override.to_string();
needs_settings_update = true;
}
if needs_settings_update {
settings = SystemSettings::update(db, updated_settings)
+8 -8
View File
@@ -1,12 +1,12 @@
use std::collections::{HashMap, VecDeque};
use anyhow::{anyhow, Result};
use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};
use anyhow::{Result, anyhow};
use rand::{SeedableRng, rngs::StdRng, seq::SliceRandom};
use tracing::warn;
use crate::datasets::{ConvertedDataset, BEIR_DATASETS};
use crate::datasets::{BEIR_DATASETS, ConvertedDataset};
use super::build::{mix_seed, BuildParams};
use super::build::{BuildParams, mix_seed};
#[allow(clippy::too_many_lines, clippy::arithmetic_side_effects)]
pub(super) fn ordered_question_refs_beir(
@@ -164,10 +164,10 @@ pub(super) fn ordered_question_refs_beir(
pub(super) fn question_prefix(question_id: &str) -> Option<&'static str> {
for prefix in BEIR_DATASETS.iter().map(|kind| kind.source_prefix()) {
if let Some(rest) = question_id.strip_prefix(prefix) {
if rest.starts_with('-') {
return Some(prefix);
}
if let Some(rest) = question_id.strip_prefix(prefix)
&& rest.starts_with('-')
{
return Some(prefix);
}
}
None
+10 -8
View File
@@ -5,9 +5,9 @@ use std::{
path::{Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, Utc};
use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};
use rand::{SeedableRng, rngs::StdRng, seq::SliceRandom};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use tracing::{info, warn};
@@ -20,7 +20,7 @@ use crate::{
mod beir;
mod build;
use build::{mix_seed, BuildParams};
use build::{BuildParams, mix_seed};
const SLICE_VERSION: u32 = 2;
pub const DEFAULT_NEGATIVE_MULTIPLIER: f32 = 4.0;
@@ -1116,11 +1116,13 @@ mod tests {
assert_eq!(window.cases.len(), 1);
let positive_ids: Vec<&str> = window.positive_ids().collect();
assert_eq!(positive_ids.len(), 1);
assert!(resolved
.manifest
.paragraphs
.iter()
.any(|entry| entry.id == positive_ids[0]));
assert!(
resolved
.manifest
.paragraphs
.iter()
.any(|entry| entry.id == positive_ids[0])
);
Ok(())
}
+1 -1
View File
@@ -27,7 +27,7 @@
if pkgs.stdenv.isDarwin
then "dylib"
else "so";
minneVersion = "1.0.4";
minneVersion = "1.0.5";
mozjsRelease = "mozjs-sys-v140.10.1-0";
mozjsTarget =
+1 -1
View File
@@ -1,7 +1,7 @@
[package]
name = "html-router"
version = "0.1.0"
edition = "2021"
edition = "2024"
license = "AGPL-3.0-or-later"
[lints]
+1 -1
View File
@@ -6,8 +6,8 @@ use common::{create_template_engine, storage::db::ProvidesDb, utils::config::App
use retrieval_pipeline::reranking::RerankerPool;
use std::collections::HashMap;
use std::sync::{
atomic::{AtomicUsize, Ordering},
Arc,
atomic::{AtomicUsize, Ordering},
};
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
+2 -2
View File
@@ -13,14 +13,14 @@ pub mod router_factory;
pub mod routes;
pub mod utils;
use axum::{extract::FromRef, Router};
use axum::{Router, extract::FromRef};
use axum_session::{Session, SessionStore};
use axum_session_auth::AuthSession;
use axum_session_surreal::SessionSurrealPool;
use common::storage::types::user::User;
use html_state::HtmlState;
use router_factory::RouterFactory;
use surrealdb::{engine::any::Any, Surreal};
use surrealdb::{Surreal, engine::any::Any};
pub type AuthSessionType = AuthSession<User, String, SessionSurrealPool<Any>, Surreal<Any>>;
pub type SessionType = Session<SessionSurrealPool<Any>>;
@@ -2,13 +2,13 @@ use std::collections::HashMap;
use std::sync::Arc;
use axum::{
Extension,
extract::{Request, State},
http::{HeaderName, StatusCode},
middleware::Next,
response::{Html, IntoResponse, Redirect, Response},
Extension,
};
use axum_htmx::{HxRequest, HX_TRIGGER};
use axum_htmx::{HX_TRIGGER, HxRequest};
use common::{
error::AppError,
utils::template_engine::{ProvidesTemplateEngine, Value},
@@ -18,7 +18,7 @@ use serde::Serialize;
use serde_json::json;
use tracing::error;
use crate::{html_state::HtmlState, AuthSessionType};
use crate::{AuthSessionType, html_state::HtmlState};
use common::storage::types::{
conversation::{Conversation, SidebarConversation},
user::{Theme, User},
@@ -175,10 +175,10 @@ const HTMX_HEADERS_TO_FORWARD: &[&str] = &["HX-Push", "HX-Trigger", "HX-Redirect
fn forward_headers(from: &axum::http::HeaderMap, to: &mut axum::http::HeaderMap) {
for &header_name in HTMX_HEADERS_TO_FORWARD {
if let Ok(name) = HeaderName::from_bytes(header_name.as_bytes()) {
if let Some(value) = from.get(&name) {
to.insert(name.clone(), value.clone());
}
if let Ok(name) = HeaderName::from_bytes(header_name.as_bytes())
&& let Some(value) = from.get(&name)
{
to.insert(name.clone(), value.clone());
}
}
}
@@ -219,13 +219,13 @@ where
let mut current_user = None;
{
if let Some(auth) = req.extensions().get::<AuthSessionType>() {
if let Some(user) = &auth.current_user {
is_authenticated = true;
user_theme = user.theme.as_str();
initial_theme = user.theme.initial_theme();
current_user = Some(TemplateUser::from(user));
}
if let Some(auth) = req.extensions().get::<AuthSessionType>()
&& let Some(user) = &auth.current_user
{
is_authenticated = true;
user_theme = user.theme.as_str();
initial_theme = user.theme.initial_theme();
current_user = Some(TemplateUser::from(user));
}
}
+2 -2
View File
@@ -1,9 +1,9 @@
use axum::{extract::FromRef, middleware::from_fn_with_state, Router};
use axum::{Router, extract::FromRef, middleware::from_fn_with_state};
use axum_session::SessionLayer;
use axum_session_auth::{AuthConfig, AuthSessionLayer};
use axum_session_surreal::SessionSurrealPool;
use common::storage::types::user::User;
use surrealdb::{engine::any::Any, Surreal};
use surrealdb::{Surreal, engine::any::Any};
use crate::{
html_state::HtmlState,
+2 -2
View File
@@ -1,13 +1,13 @@
use axum::{extract::State, Form};
use axum::{Form, extract::State};
use chrono_tz::TZ_VARIANTS;
use serde::{Deserialize, Serialize};
use crate::{
AuthSessionType,
middlewares::{
auth_middleware::RequireUser,
response_middleware::{TemplateResponse, TemplateResult},
},
AuthSessionType,
};
use common::storage::types::user::{Theme, User};
+1 -1
View File
@@ -1,8 +1,8 @@
mod handlers;
use axum::{
Router,
extract::FromRef,
routing::{delete, get, patch, post},
Router,
};
use crate::html_state::HtmlState;
+3 -3
View File
@@ -1,7 +1,7 @@
use async_openai::types::models::ListModelResponse;
use axum::{
extract::{Query, State},
Form,
extract::{Query, State},
};
use serde::{Deserialize, Serialize};
@@ -18,8 +18,8 @@ use common::{
utils::{
config::AppConfig,
embedding::{
fastembed_model_dimension, is_valid_fastembed_model_code,
list_fastembed_embedding_models, EmbeddingBackend, FastEmbedModelOption,
EmbeddingBackend, FastEmbedModelOption, fastembed_model_dimension,
is_valid_fastembed_model_code, list_fastembed_embedding_models,
},
},
};
+1 -1
View File
@@ -1,9 +1,9 @@
mod handlers;
use axum::{
Router,
extract::FromRef,
middleware::from_fn,
routing::{get, patch},
Router,
};
use handlers::{
patch_image_prompt, patch_ingestion_prompt, patch_query_prompt, show_admin_panel,
+1 -1
View File
@@ -2,7 +2,7 @@ pub mod signin;
pub mod signout;
pub mod signup;
use axum::{extract::FromRef, routing::get, Router};
use axum::{Router, extract::FromRef, routing::get};
use signin::{authenticate_user, show_signin_form};
use signout::sign_out_user;
use signup::{process_signup_and_show_verification, show_signup_form};
+2 -2
View File
@@ -1,11 +1,11 @@
use axum::{extract::State, Form};
use axum::{Form, extract::State};
use axum_htmx::HxBoosted;
use serde::{Deserialize, Serialize};
use crate::{
AuthSessionType,
html_state::HtmlState,
middlewares::response_middleware::{TemplateResponse, TemplateResult},
AuthSessionType,
};
use common::storage::types::user::User;
+1 -1
View File
@@ -1,6 +1,6 @@
use crate::{
middlewares::response_middleware::{TemplateResponse, TemplateResult},
AuthSessionType,
middlewares::response_middleware::{TemplateResponse, TemplateResult},
};
pub async fn sign_out_user(auth: AuthSessionType) -> TemplateResult {
+2 -2
View File
@@ -1,4 +1,4 @@
use axum::{extract::State, Form};
use axum::{Form, extract::State};
use axum_htmx::HxBoosted;
use serde::{Deserialize, Serialize};
@@ -8,9 +8,9 @@ use common::{
};
use crate::{
AuthSessionType,
html_state::HtmlState,
middlewares::response_middleware::{TemplateResponse, TemplateResult},
AuthSessionType,
};
#[derive(Deserialize, Serialize)]
+3 -3
View File
@@ -1,7 +1,7 @@
use axum::{
Form,
extract::{Path, State},
http::HeaderValue,
Form,
};
use serde::{Deserialize, Serialize};
@@ -18,8 +18,8 @@ use crate::{
middlewares::{
auth_middleware::RequireUser,
response_middleware::{
template_as_response, template_with_headers, ResponseResult, TemplateResponse,
TemplateResult,
ResponseResult, TemplateResponse, TemplateResult, template_as_response,
template_with_headers,
},
},
};
@@ -6,24 +6,24 @@ use async_stream::stream;
use axum::{
extract::{Query, State},
response::{
sse::{Event, KeepAlive, KeepAliveStream},
Sse,
sse::{Event, KeepAlive, KeepAliveStream},
},
};
use futures::{
stream::{self, once},
Stream, StreamExt, TryStreamExt,
stream::{self, once},
};
use json_stream_parser::JsonStreamParser;
use minijinja::Value;
use retrieval_pipeline::answer_retrieval::{
chunks_to_chat_context, create_chat_request, create_user_message_with_history,
LLMResponseFormat,
LLMResponseFormat, chunks_to_chat_context, create_chat_request,
create_user_message_with_history,
};
use serde::{Deserialize, Serialize};
use serde_json::from_str;
use tokio::sync::mpsc::channel;
use tokio::sync::Mutex;
use tokio::sync::mpsc::channel;
use tracing::{debug, error, info};
use common::storage::{
@@ -66,7 +66,7 @@ async fn get_message_and_user(
Ok(None) => {
return Err(sse_with_keep_alive(create_error_stream(
"Message not found: the specified message does not exist",
)))
)));
}
Err(e) => {
error!("Database error retrieving message {}: {:?}", message_id, e);
@@ -233,12 +233,12 @@ pub async fn get_response_stream(
fn build_chat_event_stream(
state: HtmlState,
openai_stream: impl Stream<
Item = Result<
async_openai::types::chat::CreateChatCompletionStreamResponse,
async_openai::error::OpenAIError,
>,
> + Send
+ 'static,
Item = Result<
async_openai::types::chat::CreateChatCompletionStreamResponse,
async_openai::error::OpenAIError,
>,
> + Send
+ 'static,
user_message: &Message,
user_id: String,
allowed_reference_ids: Vec<String>,
@@ -502,17 +502,17 @@ impl StreamParserState {
let json = self.parser.result();
if let Some(obj) = json.as_object() {
if let Some(answer) = obj.get("answer") {
self.in_answer_field = true;
if let Some(obj) = json.as_object()
&& let Some(answer) = obj.get("answer")
{
self.in_answer_field = true;
let current_content = answer.as_str().unwrap_or_default().to_string();
let current_content = answer.as_str().unwrap_or_default().to_string();
if current_content.len() > self.last_answer_content.len() {
let new_content = current_content[self.last_answer_content.len()..].to_string();
self.last_answer_content = current_content;
return new_content;
}
if current_content.len() > self.last_answer_content.len() {
let new_content = current_content[self.last_answer_content.len()..].to_string();
self.last_answer_content = current_content;
return new_content;
}
}
+1 -1
View File
@@ -3,7 +3,7 @@ mod message_response_stream;
mod reference_validation;
mod references;
use axum::{extract::FromRef, routing::get, Router};
use axum::{Router, extract::FromRef, routing::get};
pub use chat_handlers::{
delete_conversation, new_chat_user_message, new_user_message, patch_conversation_title,
reload_sidebar, show_chat_base as show_base, show_conversation_editing_title,
@@ -6,7 +6,7 @@ use common::{
error::AppError,
storage::{
db::SurrealDbClient,
types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk, StoredObject},
types::{StoredObject, knowledge_entity::KnowledgeEntity, text_chunk::TextChunk},
},
};
use retrieval_pipeline::RetrievalOutput;
@@ -448,10 +448,12 @@ mod tests {
assert_eq!(result.valid_refs, vec![first.id, second.id]);
assert_eq!(result.invalid_refs.len(), 2);
assert!(result
.invalid_refs
.iter()
.all(|entry| entry.reason == InvalidReferenceReason::Duplicate));
assert!(
result
.invalid_refs
.iter()
.all(|entry| entry.reason == InvalidReferenceReason::Duplicate)
);
}
#[tokio::test]
+1 -1
View File
@@ -17,7 +17,7 @@ use crate::{
},
};
use super::reference_validation::{normalize_reference, ReferenceLookupTarget};
use super::reference_validation::{ReferenceLookupTarget, normalize_reference};
#[derive(Serialize)]
struct ReferenceTooltipData {
+2 -2
View File
@@ -1,6 +1,6 @@
use axum::{
extract::{Path, Query, State},
Form,
extract::{Path, Query, State},
};
use axum_htmx::{HxBoosted, HxRequest, HxTarget};
use serde::{Deserialize, Serialize};
@@ -13,7 +13,7 @@ use crate::{
auth_middleware::RequireUser,
response_middleware::{TemplateResponse, TemplateResult},
},
utils::pagination::{paginate_items, Pagination},
utils::pagination::{Pagination, paginate_items},
utils::text_content_preview::truncate_text_contents,
};
use url::form_urlencoded;
+1 -1
View File
@@ -1,6 +1,6 @@
mod handlers;
use axum::{extract::FromRef, routing::get, Router};
use axum::{Router, extract::FromRef, routing::get};
use handlers::{
delete_text_content, patch_text_content, show_content_page, show_content_read_modal,
show_recent_content, show_text_content_edit_form,
+2 -2
View File
@@ -1,7 +1,7 @@
use axum::{
body::Body,
extract::{Path, State},
http::{header, HeaderMap, HeaderValue, StatusCode},
http::{HeaderMap, HeaderValue, StatusCode, header},
response::IntoResponse,
};
use chrono::{DateTime, Utc};
@@ -13,7 +13,7 @@ use crate::{
middlewares::{
auth_middleware::RequireUser,
response_middleware::{
template_as_response, ResponseResult, TemplateResponse, TemplateResult,
ResponseResult, TemplateResponse, TemplateResult, template_as_response,
},
},
utils::text_content_preview::truncate_text_contents,
+1 -1
View File
@@ -1,9 +1,9 @@
pub mod handlers;
use axum::{
Router,
extract::FromRef,
routing::{delete, get},
Router,
};
use handlers::{
delete_job, delete_text_content, index_handler, serve_file, show_active_jobs, show_task_archive,
+3 -3
View File
@@ -4,12 +4,12 @@ use axum::{
extract::{Query, State},
http::StatusCode,
response::{
sse::{Event, KeepAlive, KeepAliveStream},
Sse,
sse::{Event, KeepAlive, KeepAliveStream},
},
};
use axum_typed_multipart::{FieldData, TryFromMultipart, TypedMultipart};
use futures::{future::try_join_all, stream, Stream, StreamExt, TryFutureExt};
use futures::{Stream, StreamExt, TryFutureExt, future::try_join_all, stream};
use minijinja::context;
use serde::{Deserialize, Serialize};
use tempfile::NamedTempFile;
@@ -24,7 +24,7 @@ use common::{
ingestion_task::{IngestionTask, TaskState},
user::User,
},
utils::ingest_limits::{validate_ingest_input, IngestValidationError},
utils::ingest_limits::{IngestValidationError, validate_ingest_input},
};
use crate::{
+1 -1
View File
@@ -1,6 +1,6 @@
mod handlers;
use axum::{extract::DefaultBodyLimit, extract::FromRef, routing::get, Router};
use axum::{Router, extract::DefaultBodyLimit, extract::FromRef, routing::get};
use handlers::{get_task_updates_stream, hide_ingest_form, process_ingest_form, show_ingest_form};
use crate::html_state::HtmlState;
+11 -12
View File
@@ -3,15 +3,15 @@ use std::collections::{HashMap, HashSet};
use std::fmt;
use axum::{
Form, Json,
extract::{Path, Query, State},
http::HeaderValue,
response::{IntoResponse, Response},
Form, Json,
};
use axum_htmx::{HxBoosted, HxRequest, HX_TRIGGER};
use axum_htmx::{HX_TRIGGER, HxBoosted, HxRequest};
use serde::{
de::{self, Deserializer, MapAccess, Visitor},
Deserialize, Serialize,
de::{self, Deserializer, MapAccess, Visitor},
};
use common::{
@@ -27,7 +27,7 @@ use common::{
utils::embedding::EmbeddingProvider,
};
use retrieval_pipeline::{
normalize_fts_terms, reciprocal_rank_fusion, RetrievalTuning, RrfConfig, Scored,
RetrievalTuning, RrfConfig, Scored, normalize_fts_terms, reciprocal_rank_fusion,
};
use tracing::debug;
use uuid::Uuid;
@@ -37,10 +37,10 @@ use crate::{
middlewares::{
auth_middleware::RequireUser,
response_middleware::{
template_with_headers, ResponseResult, TemplateResponse, TemplateResult,
ResponseResult, TemplateResponse, TemplateResult, template_with_headers,
},
},
utils::pagination::{paginate_items, paginate_slice, Pagination},
utils::pagination::{Pagination, paginate_items, paginate_slice},
};
use url::form_urlencoded;
@@ -950,12 +950,11 @@ fn normalize_filter(input: Option<String>) -> Option<String> {
fn trim_matching_quotes(value: &str) -> &str {
let bytes = value.as_bytes();
if let (Some(&first), Some(&last)) = (bytes.first(), bytes.last()) {
if bytes.len() >= 2
&& ((first == b'"' && last == b'"') || (first == b'\'' && last == b'\''))
{
return &value[1..value.len().saturating_sub(1)];
}
if let (Some(&first), Some(&last)) = (bytes.first(), bytes.last())
&& bytes.len() >= 2
&& ((first == b'"' && last == b'"') || (first == b'\'' && last == b'\''))
{
return &value[1..value.len().saturating_sub(1)];
}
value
}
+1 -1
View File
@@ -1,9 +1,9 @@
mod handlers;
use axum::{
Router,
extract::FromRef,
routing::{delete, get, post},
Router,
};
use handlers::{
create_knowledge_entity, delete_knowledge_entity, delete_knowledge_relationship,
@@ -1,10 +1,10 @@
use axum::{
Form,
extract::{Path, Query, State},
http::{HeaderValue, StatusCode},
response::{IntoResponse, Response},
Form,
};
use axum_htmx::{HxBoosted, HxRequest, HX_TRIGGER};
use axum_htmx::{HX_TRIGGER, HxBoosted, HxRequest};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
@@ -12,7 +12,7 @@ use crate::html_state::HtmlState;
use crate::middlewares::{
auth_middleware::RequireUser,
response_middleware::{
template_with_headers, ResponseResult, TemplateResponse, TemplateResult,
ResponseResult, TemplateResponse, TemplateResult, template_with_headers,
},
};
use common::storage::types::{
+1 -1
View File
@@ -1,8 +1,8 @@
mod handlers;
use axum::{
Router,
extract::FromRef,
routing::{delete, get, patch, post},
Router,
};
use crate::html_state::HtmlState;
+2 -2
View File
@@ -4,9 +4,9 @@ use axum::extract::{Query, State};
use axum_htmx::{HxBoosted, HxRequest};
use common::storage::types::{text_content::TextContent, user::User};
use retrieval_pipeline::{
retrieve, RetrievalConfig, RetrievalOutput, RetrievedChunk, RetrievedEntity,
RetrievalConfig, RetrievalOutput, RetrievedChunk, RetrievedEntity, retrieve,
};
use serde::{de, Deserialize, Deserializer, Serialize};
use serde::{Deserialize, Deserializer, Serialize, de};
use std::{fmt, str::FromStr};
use crate::{
+2 -2
View File
@@ -1,8 +1,8 @@
mod handlers;
use axum::{extract::FromRef, routing::get, Router};
use axum::{Router, extract::FromRef, routing::get};
#[allow(clippy::module_name_repetitions)]
pub use handlers::{search_result_handler as result_handler, SearchParams as SearchQueryParams};
pub use handlers::{SearchParams as SearchQueryParams, search_result_handler as result_handler};
use crate::html_state::HtmlState;
+3 -3
View File
@@ -3,10 +3,10 @@
use std::sync::Arc;
use axum::{
body::{to_bytes, Body},
http::{header, Request, StatusCode},
response::Response,
Router,
body::{Body, to_bytes},
http::{Request, StatusCode, header},
response::Response,
};
use common::{
storage::{db::SurrealDbClient, store::StorageManager, types::user::User},
+1 -1
View File
@@ -9,7 +9,7 @@
use std::fs;
use std::path::{Path, PathBuf};
use minijinja::{path_loader, Environment};
use minijinja::{Environment, path_loader};
fn templates_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("templates")
+1 -1
View File
@@ -1,7 +1,7 @@
[package]
name = "ingestion-pipeline"
version = "0.1.0"
edition = "2021"
edition = "2024"
license = "AGPL-3.0-or-later"
[lints]
+4 -4
View File
@@ -7,14 +7,14 @@ use chrono::Utc;
use common::storage::{
db::SurrealDbClient,
indexes::maybe_run_scheduled_index_rebuild,
types::ingestion_task::{IngestionTask, DEFAULT_LEASE_SECS},
types::ingestion_task::{DEFAULT_LEASE_SECS, IngestionTask},
};
pub use pipeline::{
persist_artifacts, EmbeddedKnowledgeEntity, EmbeddedTextChunk, IngestionConfig,
IngestionPipeline, IngestionTuning, PipelineArtifacts,
EmbeddedKnowledgeEntity, EmbeddedTextChunk, IngestionConfig, IngestionPipeline,
IngestionTuning, PipelineArtifacts, persist_artifacts,
};
use std::sync::Arc;
use tokio::time::{sleep, Duration};
use tokio::time::{Duration, sleep};
use tracing::{error, info, warn};
use uuid::Uuid;
+3 -3
View File
@@ -42,7 +42,7 @@ use tracing::{debug, info, warn};
use self::{
context::PipelineContext,
stages::{enrich, persist, prepare_content, retrieve_related},
state::{ready, Enriched, IngestionMachine},
state::{Enriched, IngestionMachine, ready},
};
/// Wall-clock duration of each pre-persistence pipeline stage.
@@ -355,10 +355,10 @@ mod finalize_tests {
use tokio::time::sleep;
use super::{
IngestionPipeline, PipelineServices,
config::IngestionTuning,
test_support::setup_db,
tests::{pipeline_config, reserve_task, MockServices},
IngestionPipeline, PipelineServices,
tests::{MockServices, pipeline_config, reserve_task},
};
#[tokio::test]
@@ -12,14 +12,13 @@ use common::{
storage::{
db::SurrealDbClient,
types::{
knowledge_entity::KnowledgeEntity,
EmbeddingRecord, StoredObject, knowledge_entity::KnowledgeEntity,
knowledge_entity_embedding::KnowledgeEntityEmbedding, text_chunk::TextChunk,
text_chunk_embedding::TextChunkEmbedding, text_content::TextContent, EmbeddingRecord,
StoredObject,
text_chunk_embedding::TextChunkEmbedding, text_content::TextContent,
},
},
};
use tokio::time::{sleep, Duration};
use tokio::time::{Duration, sleep};
use tracing::warn;
use super::{
@@ -268,8 +267,8 @@ mod tests {
use super::*;
use crate::pipeline::test_support::{
self, count_chunks_for_source, count_entities_for_source, count_relationships_for_source,
large_artifacts, persist, sample_artifacts, setup_db, TEST_EMBEDDING_DIM,
self, TEST_EMBEDDING_DIM, count_chunks_for_source, count_entities_for_source,
count_relationships_for_source, large_artifacts, persist, sample_artifacts, setup_db,
};
#[tokio::test]
+5 -5
View File
@@ -15,14 +15,14 @@ use common::{
db::SurrealDbClient,
store::StorageManager,
types::{
ingestion_payload::IngestionPayload, knowledge_relationship::KnowledgeRelationship,
system_settings::SystemSettings, text_chunk::TextChunk, text_content::TextContent,
StoredObject,
StoredObject, ingestion_payload::IngestionPayload,
knowledge_relationship::KnowledgeRelationship, system_settings::SystemSettings,
text_chunk::TextChunk, text_content::TextContent,
},
},
utils::{config::AppConfig, embedding::EmbeddingProvider},
};
use retrieval_pipeline::{reranking::RerankerPool, retrieved_entities_to_json, RetrievedEntity};
use retrieval_pipeline::{RetrievedEntity, reranking::RerankerPool, retrieved_entities_to_json};
use text_splitter::{ChunkCapacity, ChunkConfig, TextSplitter};
use super::{enrichment_result::LLMEnrichmentResult, preparation::to_text_content};
@@ -358,7 +358,7 @@ mod tests {
use std::sync::Arc;
use anyhow::Context;
use async_openai::{config::OpenAIConfig, types::chat::ChatCompletionRequestMessage, Client};
use async_openai::{Client, config::OpenAIConfig, types::chat::ChatCompletionRequestMessage};
use common::{
storage::{
db::SurrealDbClient, store::StorageManager, types::system_settings::SystemSettingsPatch,
+6 -4
View File
@@ -1,6 +1,7 @@
use std::sync::Arc;
use super::{
IngestionPipeline,
config::{IngestionConfig, IngestionTuning},
enrichment_result::LLMEnrichmentResult,
services::PipelineServices,
@@ -8,7 +9,6 @@ use super::{
count_chunks_for_source, count_entities_for_source, count_relationships_for_source,
persist, sample_artifacts, setup_db,
},
IngestionPipeline,
};
use crate::pipeline::context::{EmbeddedKnowledgeEntity, EmbeddedTextChunk};
use anyhow::{self, Context};
@@ -405,9 +405,11 @@ async fn ingestion_pipeline_happy_path_persists_artifacts() -> anyhow::Result<()
call_log.get(0..4),
Some(&["prepare", "retrieve", "enrich", "convert"][..])
);
assert!(call_log
.get(4..)
.is_some_and(|tail| tail.iter().all(|entry| *entry == "chunk")));
assert!(
call_log
.get(4..)
.is_some_and(|tail| tail.iter().all(|entry| *entry == "chunk"))
);
Ok(())
}
@@ -38,11 +38,11 @@ async fn materialize_temp_file(
let mut path = env::temp_dir();
let mut file_name = format!("minne-ingest-{}", Uuid::new_v4());
if let Some(ext) = extension {
if !ext.is_empty() {
file_name.push('.');
file_name.push_str(ext);
}
if let Some(ext) = extension
&& !ext.is_empty()
{
file_name.push('.');
file_name.push_str(ext);
}
path.push(file_name);
@@ -142,7 +142,7 @@ pub async fn extract_text_from_file(
#[cfg(test)]
mod tests {
use super::*;
use async_openai::{config::OpenAIConfig, Client};
use async_openai::{Client, config::OpenAIConfig};
use bytes::Bytes;
use chrono::Utc;
use common::{
@@ -3,7 +3,7 @@ use async_openai::types::chat::{
ChatCompletionRequestMessageContentPartTextArgs, ChatCompletionRequestUserMessageArgs,
CreateChatCompletionRequestArgs, ImageDetail, ImageUrlArgs,
};
use base64::{engine::general_purpose::STANDARD, Engine as _};
use base64::{Engine as _, engine::general_purpose::STANDARD};
use common::{
error::AppError,
storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
+11 -4
View File
@@ -148,20 +148,27 @@ async fn maybe_dump_debug_image(page_index: u32, bytes: &[u8]) -> Result<(), App
mod tests {
use super::*;
use anyhow::{self};
use lopdf::dictionary;
use lopdf::Object;
use lopdf::dictionary;
#[test]
fn test_debug_dump_directory_env_var() -> anyhow::Result<()> {
std::env::remove_var(DEBUG_IMAGE_ENV_VAR);
// SAFETY: test runs serially; env is restored before return.
unsafe {
std::env::remove_var(DEBUG_IMAGE_ENV_VAR);
}
assert!(debug_dump_directory().is_none());
std::env::set_var(DEBUG_IMAGE_ENV_VAR, "/tmp/minne_pdf_debug");
unsafe {
std::env::set_var(DEBUG_IMAGE_ENV_VAR, "/tmp/minne_pdf_debug");
}
let dir =
debug_dump_directory().ok_or_else(|| anyhow::anyhow!("expected debug directory"))?;
assert_eq!(dir, PathBuf::from("/tmp/minne_pdf_debug"));
std::env::remove_var(DEBUG_IMAGE_ENV_VAR);
unsafe {
std::env::remove_var(DEBUG_IMAGE_ENV_VAR);
}
Ok(())
}

Some files were not shown because too many files have changed in this diff Show More