diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 245604d..0000000 --- a/.dockerignore +++ /dev/null @@ -1,40 +0,0 @@ -# Git stuff -.git/ -.gitignore -.github - -# Node build artifacts -**/node_modules/ - -# Nix/Devenv environment files -.direnv/ -.devenv/ -devenv.lock -devenv.nix -devenv.yaml -docker-compose.yml -.envrc -.devenv.flake.nix -flake.lock -flake.nix - -# Rust build artifacts (crucial for multi-stage builds) -**/target/ - -# Runtime data directories -data/ -database/ - -# Local environment config (sensitive) -.env - -# IDE specific -.vscode/ -.idea/ - -# OS specific -.DS_Store -Thumbs.db - -# Logs / Temporary files -*.log diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e9ace9f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,42 @@ +name: CI +permissions: + contents: read + id-token: write + actions: write + +on: + pull_request: + push: + branches: [main] + +jobs: + checks: + name: Nix checks + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + actions: write + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install Nix + uses: DeterminateSystems/determinate-nix-action@main + + - name: Set up Nix store cache + uses: nix-community/cache-nix-action@v6 + with: + primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }} + restore-prefixes-first-match: nix-${{ runner.os }}- + gc-max-store-size-linux: 10G + purge: true + purge-prefixes: nix-${{ runner.os }}- + purge-created: 14 + purge-last-access: 7 + purge-primary-key: never + + - name: Run all flake checks + run: nix flake check -L --show-trace diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 82bbf32..07041ef 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ on: pull_request: push: tags: - - '**[0-9]+.[0-9]+.[0-9]+*' + - "**[0-9]+.[0-9]+.[0-9]+*" jobs: plan: @@ -17,6 +17,7 @@ jobs: tag: ${{ !github.event.pull_request && github.ref_name || '' }} tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} publishing: ${{ !github.event.pull_request }} + ort-version: ${{ steps.ort_version.outputs.value }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: @@ -25,11 +26,27 @@ jobs: submodules: recursive - name: Install Nix - uses: cachix/install-nix-action@v27 + uses: DeterminateSystems/determinate-nix-action@main with: - extra_nix_config: | + extra-conf: | experimental-features = nix-command flakes + - name: Set up Nix store cache + uses: nix-community/cache-nix-action@v6 + with: + primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }} + restore-prefixes-first-match: nix-${{ runner.os }}- + gc-max-store-size-linux: 10G + purge: true + purge-prefixes: nix-${{ runner.os }}- + purge-created: 14 + purge-last-access: 7 + purge-primary-key: never + + - name: Read ORT version from flake + id: ort_version + run: echo "value=$(nix eval .#lib.ortVersion --raw)" >> "$GITHUB_OUTPUT" + - name: Verify ort-version matches nixpkgs onnxruntime run: nix flake check --system x86_64-linux -L @@ -78,7 +95,7 @@ jobs: - name: Load ONNX Runtime version shell: bash - run: echo "ORT_VER=$(tr -d '[:space:]' < ort-version)" >> "$GITHUB_ENV" + run: echo "ORT_VER=${{ needs.plan.outputs.ort-version }}" >> "$GITHUB_ENV" - name: Install Rust non-interactively if not already installed if: ${{ matrix.container }} @@ -108,7 +125,7 @@ jobs: run: | mkdir -p lib rm -f lib/* - + # Windows PowerShell - name: Prepare lib dir (Windows) if: runner.os == 'Windows' @@ -158,7 +175,6 @@ jobs: echo "lib/ contents:" ls -l lib || dir lib # ===== END: Injected ORT staging ===== - - name: Install dependencies run: | ${{ matrix.packages_install }} @@ -186,21 +202,37 @@ jobs: ${{ env.BUILD_MANIFEST_NAME }} build_and_push_docker_image: - name: Build and Push Docker Image + name: Build and Push Docker Image (Nix) runs-on: ubuntu-latest needs: [plan] if: ${{ needs.plan.outputs.publishing == 'true' }} permissions: contents: read + id-token: write packages: write + actions: write steps: - - name: Checkout repository - uses: actions/checkout@v4 + - uses: actions/checkout@v4 with: submodules: recursive - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + - name: Install Nix + uses: DeterminateSystems/determinate-nix-action@main + + - name: Set up Nix store cache + uses: nix-community/cache-nix-action@v6 + with: + primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }} + restore-prefixes-first-match: nix-${{ runner.os }}- + gc-max-store-size-linux: 10G + purge: true + purge-prefixes: nix-${{ runner.os }}- + purge-created: 14 + purge-last-access: 7 + purge-primary-key: never + + - name: Build Docker image with Nix + run: nix build .#dockerImage -L --show-trace - name: Log in to GitHub Container Registry uses: docker/login-action@v3 @@ -215,15 +247,16 @@ jobs: with: images: ghcr.io/${{ github.repository }} - - name: Build and push Docker image - uses: docker/build-push-action@v5 - with: - context: . - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max + - name: Load and push Docker image + env: + IMAGE_NAME: ghcr.io/${{ github.repository }} + IMAGE_TAG: ${{ needs.plan.outputs.tag }} + run: | + docker load < result + docker tag "minne:1.0.3" "$IMAGE_NAME:$IMAGE_TAG" + docker tag "minne:1.0.3" "$IMAGE_NAME:latest" + docker push "$IMAGE_NAME:$IMAGE_TAG" + docker push "$IMAGE_NAME:latest" build-global-artifacts: needs: [plan, build-local-artifacts] diff --git a/CHANGELOG.md b/CHANGELOG.md index 429e6f6..ca4fb96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- Docker-compose: The example now references the ghcr image, this is so we can remove the Dockerfile and reducing maintenance scope. - Refactor: web scraping now uses `servo-fetch` (pure-Rust Servo engine) and PDF rendering uses `pdfium-render` (direct PDFium bindings) — reduces Docker image size by ~300MB, improves startup latency by ~100× for PDF rendering, and provides more stable output - Fix: added `pkgs.libglvnd` to `LD_LIBRARY_PATH` in devenv so Servo engine can find `libEGL.so` at runtime - Fix: updated Dockerfile to add `libegl1 libegl-mesa0 libgles2 libfontconfig1 libfreetype6` runtime dependencies for servo-fetch @@ -19,6 +20,8 @@ - Fix: content deletion clears graph relationships via shared `TextContent::clear_ingested_children` - Fix: regression re suggestion of relationships - Internal: extracted duplicate entity+embedding patterns into `HasEmbedding` and `EmbeddingRecord` traits with generic `store_with_embedding`, `delete_by_source_id`, and `vector_search` on `SurrealDbClient`. +- Infra: `ort-version` file removed — version inlined in `flake.nix` and `devenv.nix`; `release.yml` reads it via `nix eval .#lib.ortVersion` from the plan job +- Infra: `screenshot-graph.webp` and `.dockerignore` deleted — stale artifacts from Dockerfile era ## 1.0.3 (2026-06-12) diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 1642654..0000000 --- a/Dockerfile +++ /dev/null @@ -1,52 +0,0 @@ -# === Builder === -FROM rust:1.91.1-bookworm AS builder -WORKDIR /usr/src/minne -RUN apt-get update && apt-get install -y --no-install-recommends \ - pkg-config clang cmake git && rm -rf /var/lib/apt/lists/* - -# Cache deps -COPY Cargo.toml Cargo.lock ./ -RUN mkdir -p api-router common retrieval-pipeline html-router ingestion-pipeline json-stream-parser main worker -COPY api-router/Cargo.toml ./api-router/ -COPY common/Cargo.toml ./common/ -COPY retrieval-pipeline/Cargo.toml ./retrieval-pipeline/ -COPY html-router/Cargo.toml ./html-router/ -COPY ingestion-pipeline/Cargo.toml ./ingestion-pipeline/ -COPY json-stream-parser/Cargo.toml ./json-stream-parser/ -COPY main/Cargo.toml ./main/ -RUN cargo build --release --bin main || true - -# Build -COPY . . -RUN cargo build --release --bin main - -# === Runtime === -FROM debian:bookworm-slim - -# Servo engine (for servo-fetch web scraping) + runtime deps + OpenMP for ORT -RUN apt-get update && apt-get install -y --no-install-recommends \ - libegl1 libegl-mesa0 libgles2 libfontconfig1 libfreetype6 \ - ca-certificates fonts-dejavu fonts-noto-color-emoji \ - libgomp1 libstdc++6 curl \ - && rm -rf /var/lib/apt/lists/* - -# ONNX Runtime (CPU). Version is read from ort-version (override with --build-arg ORT_VERSION=...). -COPY ort-version /tmp/ort-version -ARG ORT_VERSION -RUN ORT_VERSION="${ORT_VERSION:-$(tr -d '[:space:]' < /tmp/ort-version)}" && \ - mkdir -p /opt/onnxruntime && \ - curl -fsSL -o /tmp/ort.tgz \ - "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \ - tar -xzf /tmp/ort.tgz -C /opt/onnxruntime --strip-components=1 && rm /tmp/ort.tgz - -ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \ - ORT_DYLIB_PATH=/opt/onnxruntime/lib/libonnxruntime.so - -# Non-root -RUN useradd -m appuser -USER appuser -WORKDIR /home/appuser - -COPY --from=builder /usr/src/minne/target/release/main /usr/local/bin/main -EXPOSE 3000 -CMD ["main"] diff --git a/common/Cargo.toml b/common/Cargo.toml index 34d4b59..2a93194 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -24,7 +24,7 @@ dom_smoothie = { workspace = true } axum_session = { workspace = true } axum_session_auth = { workspace = true } axum_session_surreal = { workspace = true} -axum_typed_multipart = { workspace = true} +axum_typed_multipart = { workspace = true} include_dir = { workspace = true } minijinja = { workspace = true } minijinja-autoreload = { workspace = true } diff --git a/devenv.nix b/devenv.nix index 3a0d3e4..74532f3 100644 --- a/devenv.nix +++ b/devenv.nix @@ -5,11 +5,11 @@ inputs, ... }: let - ortVersion = lib.removeSuffix "\n" (builtins.readFile "${toString ./.}/ort-version"); + ortVersion = "1.23.2"; _ortVersionCheck = if pkgs.onnxruntime.version == ortVersion then null - else throw "pkgs.onnxruntime.version (${pkgs.onnxruntime.version}) must match ort-version (${ortVersion})"; + else throw "pkgs.onnxruntime.version (${pkgs.onnxruntime.version}) must match ortVersion in flake.nix (${ortVersion})"; in { devenv.warnOnNewVersion = false; diff --git a/docker-compose.yml b/docker-compose.yml index 8f427b4..cc6002e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ services: minne: - build: . + image: ghcr.io/perstarkse/minne:latest container_name: minne_app ports: - "3000:3000" diff --git a/flake.nix b/flake.nix index 82de954..24220bb 100644 --- a/flake.nix +++ b/flake.nix @@ -14,7 +14,7 @@ crane, }: let inherit (nixpkgs.legacyPackages.x86_64-linux) lib; - ortVersion = lib.removeSuffix "\n" (builtins.readFile "${self}/ort-version"); + ortVersion = "1.23.2"; in flake-utils.lib.eachDefaultSystem (system: let pkgs = nixpkgs.legacyPackages.${system}; @@ -24,83 +24,182 @@ if pkgs.stdenv.isDarwin then "dylib" else "so"; - minne-pkg = - if pkgs.onnxruntime.version == ortVersion then - craneLib.buildPackage { + minneVersion = "1.0.3"; + + # Pre-download mozjs binary archive for mozjs_sys (servo dep). + # When updating mozjs_sys version in Cargo.lock, update this URL too. + mozjsArchive = pkgs.fetchurl { + url = "https://github.com/servo/mozjs/releases/download/mozjs-sys-v140.10.1-0/libmozjs-x86_64-unknown-linux-gnu.tar.gz"; + hash = "sha256-e5kW8HTg6Hrd3sGgU9bqFNTTf7wJCChFOwKE3xyYT4Q="; + }; + + # Extra paths (common/db, html-router/templates, html-router/assets) are + # embedded at compile time via include_dir! / minijinja_embed. + commonArgs = { + version = minneVersion; src = lib.cleanSourceWith { src = ./.; - filter = let - extraPaths = [ + filter = path: type: + craneLib.filterCargoSources path type + || lib.any (x: lib.hasPrefix (toString x) (toString path)) [ (toString ./Cargo.lock) (toString ./common/db) (toString ./html-router/templates) (toString ./html-router/assets) ]; - in - path: type: let - p = toString path; - in - craneLib.filterCargoSources path type - || lib.any (x: lib.hasPrefix x p) extraPaths; }; + strictDeps = true; - pname = "minne"; - version = "1.0.3"; - # Uses nixpkgs rustc (stable). Release/Docker pin: rust-toolchain.toml (1.91.1). - doCheck = false; + buildInputs = [ + pkgs.openssl + pkgs.libglvnd + pkgs.onnxruntime + pkgs.fontconfig # .pc for yeslogic-fontconfig-sys (servo dep) + pkgs.libclang.lib # libclang.so for bindgen (servo dep) + ]; - nativeBuildInputs = [pkgs.pkg-config pkgs.rustfmt pkgs.makeWrapper]; - buildInputs = [pkgs.openssl pkgs.libglvnd pkgs.onnxruntime]; + nativeBuildInputs = [ + pkgs.pkg-config + pkgs.rustfmt + pkgs.makeWrapper + pkgs.python3 # needed by servo's stylo crate build.rs + pkgs.llvmPackages.llvm # llvm-objdump for mozjs_sys (servo dep) + pkgs.rustPlatform.bindgenHook # configures bindgen (servo deps) + ]; - postInstall = '' - wrapProgram $out/bin/main \ - --prefix LD_LIBRARY_PATH : ${pkgs.libglvnd}/lib \ - --set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt} - for b in worker server; do - if [ -x "$out/bin/$b" ]; then - wrapProgram $out/bin/$b \ - --prefix LD_LIBRARY_PATH : ${pkgs.libglvnd}/lib \ - --set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt} - fi - done - ''; - } - else - throw "pkgs.onnxruntime.version (${pkgs.onnxruntime.version}) must match ort-version (${ortVersion})"; + # Provide pre-downloaded mozjs archive so it doesn't need network + MOZJS_ARCHIVE = "${mozjsArchive}"; + }; + + # cargoBuild (not buildDepsOnly) avoids mkDummySrc breaking native build scripts. + cargoArtifacts = craneLib.cargoBuild (commonArgs + // { + cargoArtifacts = null; + pname = "minne-deps"; + cargoExtraArgs = "--workspace"; + doCheck = false; + doInstallCargoArtifacts = true; + installPhaseCommand = ""; + }); + + minne-pkg = + if pkgs.onnxruntime.version == ortVersion + then + craneLib.buildPackage (commonArgs + // { + pname = "minne"; + version = minneVersion; + inherit cargoArtifacts; + doCheck = false; # checks are in separate derivations + doInstallCargoArtifacts = true; # for reuse by check derivations + + postInstall = '' + wrapProgram $out/bin/main \ + --prefix LD_LIBRARY_PATH : ${pkgs.libglvnd}/lib \ + --set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt} + for b in worker server; do + if [ -x "$out/bin/$b" ]; then + wrapProgram $out/bin/$b \ + --prefix LD_LIBRARY_PATH : ${pkgs.libglvnd}/lib \ + --set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt} + fi + done + ''; + }) + else throw "pkgs.onnxruntime.version (${pkgs.onnxruntime.version}) must match ortVersion in flake.nix (${ortVersion})"; + + dockerImage = pkgs.dockerTools.buildLayeredImage { + name = "minne"; + tag = minneVersion; + created = "now"; + + contents = [ + minne-pkg + pkgs.cacert + pkgs.bashInteractive + pkgs.libglvnd + pkgs.fontconfig.lib + pkgs.freetype + pkgs.stdenv.cc.cc.lib # libgomp (OpenMP) for ONNX Runtime + ]; + + maxLayers = 25; + + config = { + Cmd = ["${minne-pkg}/bin/main"]; + Env = [ + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-certificates.crt" + "ORT_DYLIB_PATH=${pkgs.onnxruntime}/lib/libonnxruntime.${libExt}" + ]; + ExposedPorts = {"3000/tcp" = {};}; + User = "appuser"; + }; + }; in { packages = { - minne-pkg = minne-pkg; + inherit minne-pkg dockerImage; default = minne-pkg; }; + apps = { - main = flake-utils.lib.mkApp { - drv = minne-pkg; - name = "main"; + main = { + type = "app"; + program = "${minne-pkg}/bin/main"; + meta.description = "Minne main server — API, web UI, and background worker"; }; - worker = flake-utils.lib.mkApp { - drv = minne-pkg; - name = "worker"; + worker = { + type = "app"; + program = "${minne-pkg}/bin/worker"; + meta.description = "Minne standalone background worker (ingestion, indexing, maintenance)"; }; - server = flake-utils.lib.mkApp { - drv = minne-pkg; - name = "server"; + server = { + type = "app"; + program = "${minne-pkg}/bin/server"; + meta.description = "Minne API-only server (no background worker)"; }; - default = flake-utils.lib.mkApp { - drv = minne-pkg; - name = "main"; + default = { + type = "app"; + program = "${minne-pkg}/bin/main"; + meta.description = "Minne main server — API, web UI, and background worker"; }; }; + checks = { ortVersion = pkgs.runCommand "ort-version-check" {} '' if [ "${pkgs.onnxruntime.version}" != "${ortVersion}" ]; then - echo "pkgs.onnxruntime.version is ${pkgs.onnxruntime.version}, but ort-version pins ${ortVersion}" >&2 - echo "Update ort-version or wait for nixpkgs to catch up." >&2 + echo "pkgs.onnxruntime.version is ${pkgs.onnxruntime.version}, but flake pins ${ortVersion}" >&2 + echo "Update ortVersion in flake.nix or wait for nixpkgs to catch up." >&2 exit 1 fi touch $out ''; + + minne-clippy = craneLib.cargoClippy (commonArgs + // { + cargoArtifacts = minne-pkg; + pname = "minne"; + cargoClippyExtraArgs = "--all-targets -- --deny warnings"; + }); + + minne-test = craneLib.cargoTest (commonArgs + // { + cargoArtifacts = minne-pkg; + pname = "minne"; + buildInputs = commonArgs.buildInputs ++ [ pkgs.cacert ]; + SSL_CERT_FILE = "${pkgs.cacert}/etc/ssl/certs/ca-certificates.crt"; + cargoTestExtraArgs = "--lib --bins"; + }); + + minne-fmt = craneLib.cargoFmt { + pname = "minne-fmt"; + version = minneVersion; + src = craneLib.cleanCargoSource ./.; + }; + }; + }) + // { + lib = { + inherit ortVersion; }; - }) // { - ortVersion = ortVersion; }; } diff --git a/html-router/src/lib.rs b/html-router/src/lib.rs index 4a78b79..afe2269 100644 --- a/html-router/src/lib.rs +++ b/html-router/src/lib.rs @@ -4,6 +4,9 @@ //! the template middleware renders them with shared layout context. Route composition //! and middleware layering are handled by [`router_factory::RouterFactory`]. +// minijinja_embed output (release builds) triggers these lints. +#![allow(unused_variables, clippy::expect_used, clippy::missing_panics_doc)] + pub mod html_state; pub mod middlewares; pub mod router_factory; diff --git a/html-router/src/router_factory.rs b/html-router/src/router_factory.rs index 9c796fe..2778b5a 100644 --- a/html-router/src/router_factory.rs +++ b/html-router/src/router_factory.rs @@ -22,13 +22,13 @@ macro_rules! create_asset_service { let crate_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); let assets_path = crate_dir.join($relative_path); tracing::debug!("Assets: Serving from filesystem: {:?}", assets_path); - tower_http::services::ServeDir::new(assets_path) + tower_http::services::ServeDir::new(&assets_path) } #[cfg(not(debug_assertions))] { - tracing::debug!("Assets: Serving embedded directory"); static ASSETS_DIR: include_dir::Dir<'static> = include_dir::include_dir!("$CARGO_MANIFEST_DIR/assets"); + tracing::debug!(directory = %$relative_path, "Assets: Serving embedded directory"); tower_serve_static::ServeDir::new(&ASSETS_DIR) } }}; diff --git a/ort-version b/ort-version deleted file mode 100644 index 14bee92..0000000 --- a/ort-version +++ /dev/null @@ -1 +0,0 @@ -1.23.2 diff --git a/screenshot-dashboard.webp b/screenshot-dashboard.webp deleted file mode 100644 index 32f5e95..0000000 Binary files a/screenshot-dashboard.webp and /dev/null differ