diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4534473..82bbf32 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -77,6 +77,7 @@ jobs: submodules: recursive - name: Load ONNX Runtime version + shell: bash run: echo "ORT_VER=$(tr -d '[:space:]' < ort-version)" >> "$GITHUB_ENV" - name: Install Rust non-interactively if not already installed diff --git a/CHANGELOG.md b/CHANGELOG.md index 7415097..2aafdff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,14 @@ # Changelog ## Unreleased -- Search results are now selectable by which type, knowledge entity or ingested content -- Now possible to choose the local embedding model via the admin ui -- Admin embedding changes are saved immediately. Needs restart to re-embed. This simplifies the flow, may be subject to improvement in the future + +## 1.0.3 (2026-06-12) +- Search: filter results by type — knowledge entities, ingested content, or both +- Admin: choose the local FastEmbed model from the admin UI; changes save immediately and apply after restart (re-embeds when the vector dimension changes) +- Performance: pooled FastEmbed workers and batched embedding generation for faster ingestion and search +- Performance: lower search and chat latency from backend allocation and retrieval optimizations +- Fix: modal dialogs (scratchpad editor, admin prompts, entity creation) open and close more reliably +- Fix: improved knowledge-entity relationship suggestions when creating entities manually +- Fix: API key revocation now correctly clears the stored key ## 1.0.2 (2026-02-15) - Fix: edge case where navigation back to a chat page could trigger a new response generation diff --git a/Cargo.lock b/Cargo.lock index 003d45d..5c7a3ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5100,9 +5100,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", "getrandom 0.3.4", @@ -5787,9 +5787,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.9" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "ring", "rustls-pki-types", @@ -6336,9 +6336,9 @@ dependencies = [ [[package]] name = "state-machines" -version = "0.2.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806ba0bf43ae158b229036d8a84601649a58d9761e718b5e0e07c2953803f4c1" +checksum = "e6a3c439e93b084079d81f1ccec41c64edf5a7348484db5228344372e634b92f" dependencies = [ "state-machines-core", "state-machines-macro", @@ -6346,19 +6346,18 @@ dependencies = [ [[package]] name = "state-machines-core" -version = "0.2.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949cc50e84bed6234117f28a0ba2980dc35e9c17984ffe4e0a3364fba3e77540" +checksum = "b53079921cf97a990334cd0296c1efa4f16631ed3f30a3010bb2f2d5c76cb37b" [[package]] name = "state-machines-macro" -version = "0.2.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8322f5aa92d31b3c05faa1ec3231b82da479a20706836867d67ae89ce74927bd" +checksum = "c7158fc1607004ff2bfba01ef8ca59d0446c374cd52a25a8726bba0cbb0d5c74" dependencies = [ "proc-macro2", "quote", - "state-machines-core", "syn 2.0.115", ] diff --git a/Cargo.toml b/Cargo.toml index 6d4c603..e5be857 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ tokio-retry = "0.3.0" base64 = "0.22.1" object_store = { version = "0.11.2", features = ["aws"] } bytes = "1.7.1" -state-machines = "0.2.0" +state-machines = "0.9" pdf-extract = "0.9" lopdf = "0.32" fastembed = { version = "5.2.0", default-features = false, features = ["hf-hub-native-tls", "ort-load-dynamic"] } @@ -67,6 +67,9 @@ fastembed = { version = "5.2.0", default-features = false, features = ["hf-hub-n inherits = "release" lto = "thin" +[workspace.lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(feature, values(\"inspect\"))"] } + [workspace.lints.clippy] # Performance-focused lints perf = { level = "warn", priority = -1 } diff --git a/Dockerfile b/Dockerfile index 1173b87..a4473e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # === Builder === -FROM rust:1.89-bookworm AS builder +FROM rust:1.91.1-bookworm AS builder WORKDIR /usr/src/minne RUN apt-get update && apt-get install -y --no-install-recommends \ pkg-config clang cmake git && rm -rf /var/lib/apt/lists/* diff --git a/common/src/storage/indexes.rs b/common/src/storage/indexes.rs index 35de433..33bd90b 100644 --- a/common/src/storage/indexes.rs +++ b/common/src/storage/indexes.rs @@ -9,7 +9,7 @@ use tracing::{debug, info, warn}; use crate::{error::AppError, storage::db::SurrealDbClient}; const INDEX_POLL_INTERVAL: Duration = Duration::from_millis(50); -const INDEX_BUILD_TIMEOUT: Duration = Duration::from_mins(30); +const INDEX_BUILD_TIMEOUT: Duration = Duration::from_secs(30 * 60); const FTS_ANALYZER_NAME: &str = "app_en_fts_analyzer"; /// HNSW index options used by runtime index creation (includes CONCURRENTLY). diff --git a/common/src/storage/types/ingestion_task.rs b/common/src/storage/types/ingestion_task.rs index 4c05cba..58abdbc 100644 --- a/common/src/storage/types/ingestion_task.rs +++ b/common/src/storage/types/ingestion_task.rs @@ -750,7 +750,7 @@ mod tests { let worker_id = "worker-1"; let now = chrono::Utc::now(); - let claimed = IngestionTask::claim_next_ready(&db, worker_id, now, Duration::from_mins(1)) + let claimed = IngestionTask::claim_next_ready(&db, worker_id, now, Duration::from_secs(60)) .await .with_context(|| "claim".to_string())? .with_context(|| "task claimed".to_string())?; @@ -786,7 +786,7 @@ mod tests { let worker_id = "worker-dead"; let now = chrono::Utc::now(); - let claimed = IngestionTask::claim_next_ready(&db, worker_id, now, Duration::from_mins(1)) + let claimed = IngestionTask::claim_next_ready(&db, worker_id, now, Duration::from_secs(60)) .await .with_context(|| "claim".to_string())? .with_context(|| "claimed".to_string())?; diff --git a/common/src/utils/template_engine.rs b/common/src/utils/template_engine.rs index 0082312..54d33a1 100644 --- a/common/src/utils/template_engine.rs +++ b/common/src/utils/template_engine.rs @@ -19,6 +19,7 @@ pub enum TemplateEngine { Embedded(Arc>), } +#[allow(clippy::module_name_repetitions)] #[macro_export] macro_rules! create_template_engine { // Single path argument diff --git a/devenv.nix b/devenv.nix index ca89950..b5b49bf 100644 --- a/devenv.nix +++ b/devenv.nix @@ -7,10 +7,12 @@ }: let ortVersion = lib.removeSuffix "\n" (builtins.readFile "${toString ./.}/ort-version"); -in -lib.assertMsg (pkgs.onnxruntime.version == ortVersion) - "pkgs.onnxruntime.version (${pkgs.onnxruntime.version}) must match ort-version (${ortVersion})" -{ + _ortVersionCheck = + if pkgs.onnxruntime.version == ortVersion + then null + else + throw "pkgs.onnxruntime.version (${pkgs.onnxruntime.version}) must match ort-version (${ortVersion})"; +in { cachix.enable = false; packages = [ @@ -28,8 +30,9 @@ lib.assertMsg (pkgs.onnxruntime.version == ortVersion) languages.rust = { enable = true; + channel = "stable"; + version = "1.91.1"; components = ["rustc" "clippy" "rustfmt" "cargo" "rust-analyzer"]; - channel = "nightly"; targets = ["x86_64-unknown-linux-gnu" "x86_64-pc-windows-msvc"]; mold.enable = true; }; diff --git a/evaluations/src/datasets/beir.rs b/evaluations/src/datasets/beir.rs index 13a5970..a06a529 100644 --- a/evaluations/src/datasets/beir.rs +++ b/evaluations/src/datasets/beir.rs @@ -160,23 +160,23 @@ fn load_corpus(path: &Path) -> Result> { if raw.trim().is_empty() { continue; } - let row: BeirCorpusRow = serde_json::from_str(&raw).with_context(|| { + let corpus_row: BeirCorpusRow = serde_json::from_str(&raw).with_context(|| { format!( "parsing corpus JSON on line {} from {}", idx + 1, path.display() ) })?; - let title = row.title.unwrap_or_else(|| row.id.clone()); - let text = row.text.unwrap_or_default(); + let title = corpus_row.title.unwrap_or_else(|| corpus_row.id.clone()); + let text = corpus_row.text.unwrap_or_default(); let context = build_context(&title, &text); if context.is_empty() { - warn!(doc_id = %row.id, "Skipping empty corpus document"); + warn!(doc_id = %corpus_row.id, "Skipping empty corpus document"); continue; } - corpus.insert(row.id, BeirParagraph { title, context }); + corpus.insert(corpus_row.id, BeirParagraph { title, context }); } Ok(corpus) @@ -195,7 +195,7 @@ fn load_queries(path: &Path) -> Result> { if raw.trim().is_empty() { continue; } - let row: BeirQueryRow = serde_json::from_str(&raw).with_context(|| { + let query_row: BeirQueryRow = serde_json::from_str(&raw).with_context(|| { format!( "parsing query JSON on line {} from {}", idx + 1, @@ -203,9 +203,9 @@ fn load_queries(path: &Path) -> Result> { ) })?; queries.insert( - row.id, + query_row.id, BeirQuery { - text: row.text.trim().to_string(), + text: query_row.text.trim().to_string(), }, ); } diff --git a/flake.nix b/flake.nix index cede23e..aaf5b6a 100644 --- a/flake.nix +++ b/flake.nix @@ -46,6 +46,7 @@ pname = "minne"; version = "1.0.3"; + # Uses nixpkgs rustc (stable). Release/Docker pin: rust-toolchain.toml (1.91.1). doCheck = false; nativeBuildInputs = [pkgs.pkg-config pkgs.rustfmt pkgs.makeWrapper]; diff --git a/html-router/src/routes/ingestion/handlers.rs b/html-router/src/routes/ingestion/handlers.rs index cefc10a..893df92 100644 --- a/html-router/src/routes/ingestion/handlers.rs +++ b/html-router/src/routes/ingestion/handlers.rs @@ -51,6 +51,7 @@ pub async fn show_ingest_form( RequireUser(user): RequireUser, ) -> TemplateResult { #[derive(Serialize)] + #[allow(clippy::module_name_repetitions)] pub struct ShowIngestFormData { user_categories: Vec, } diff --git a/ingestion-pipeline/src/pipeline/tests.rs b/ingestion-pipeline/src/pipeline/tests.rs index 0a6ea8f..61e0c1a 100644 --- a/ingestion-pipeline/src/pipeline/tests.rs +++ b/ingestion-pipeline/src/pipeline/tests.rs @@ -301,7 +301,6 @@ async fn reserve_task( } #[tokio::test] -#[allow(clippy::duration_suboptimal_units)] // assertions mirror retry_delay's seconds-based config async fn retry_delay_grows_exponentially_and_caps() -> anyhow::Result<()> { use std::time::Duration; diff --git a/json-stream-parser/src/lib.rs b/json-stream-parser/src/lib.rs index d2afdd6..d8a35da 100644 --- a/json-stream-parser/src/lib.rs +++ b/json-stream-parser/src/lib.rs @@ -218,9 +218,8 @@ fn add_char_into_object( } } } - (&Value::Bool(true), &ObjectStatus::Scalar { .. }, 'e') - | (&Value::Bool(false), &ObjectStatus::Scalar { .. }, 'e') - | (&Value::Object(_), &ObjectStatus::ValueQuoteClose, '}') => { + (&Value::Bool(true) | &Value::Bool(false), &ObjectStatus::Scalar { .. }, 'e') +| (&Value::Object(_), &ObjectStatus::ValueQuoteClose, '}') => { *current_status = ObjectStatus::Closed; } diff --git a/main/Cargo.toml b/main/Cargo.toml index 33a49f0..78f6bb5 100644 --- a/main/Cargo.toml +++ b/main/Cargo.toml @@ -2,6 +2,7 @@ name = "main" version = "1.0.3" edition = "2021" +rust-version = "1.91" repository = "https://github.com/perstarkse/minne" license = "AGPL-3.0-or-later" diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..760f4ff --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,5 @@ +[toolchain] +channel = "1.91.1" +components = ["rustfmt", "clippy"] +profile = "default" +targets = ["x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]