feat: docker & docker-compose example

This commit is contained in:
Per Stark
2025-05-05 13:44:57 +02:00
parent 6ad625befc
commit 322d1ec318
7 changed files with 163 additions and 271 deletions

40
.dockerignore Normal file
View File

@@ -0,0 +1,40 @@
# Git stuff
.git/
.gitignore
.github
# Node build artifacts
**/node_modules/
# Nix/Devenv environment files
.direnv/
.devenv/
devenv.lock
devenv.nix
devenv.yaml
docker-compose.yml
.envrc
.devenv.flake.nix
flake.lock
flake.nix
# Rust build artifacts (crucial for multi-stage builds)
**/target/
# Runtime data directories
data/
database/
# Local environment config (sensitive)
.env
# IDE specific
.vscode/
.idea/
# OS specific
.DS_Store
Thumbs.db
# Logs / Temporary files
*.log

269
Cargo.lock generated
View File

@@ -247,15 +247,6 @@ dependencies = [
"num-traits",
]
[[package]]
name = "arbitrary"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "argon2"
version = "0.5.3"
@@ -517,7 +508,7 @@ dependencies = [
"quote",
"serde",
"serde_json",
"ureq 2.12.1",
"ureq",
]
[[package]]
@@ -953,25 +944,6 @@ dependencies = [
"serde",
]
[[package]]
name = "bzip2"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
dependencies = [
"bzip2-sys",
]
[[package]]
name = "bzip2-sys"
version = "0.1.13+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "castaway"
version = "0.2.3"
@@ -987,8 +959,6 @@ version = "1.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0"
dependencies = [
"jobserver",
"libc",
"shlex",
]
@@ -1414,21 +1384,6 @@ dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "3.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
dependencies = [
"crc-catalog",
]
[[package]]
name = "crc-catalog"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crc32fast"
version = "1.4.2"
@@ -1614,12 +1569,6 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
[[package]]
name = "deflate64"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b"
[[package]]
name = "deranged"
version = "0.4.0"
@@ -1630,17 +1579,6 @@ dependencies = [
"serde",
]
[[package]]
name = "derive_arbitrary"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.101",
]
[[package]]
name = "derive_builder"
version = "0.20.2"
@@ -1720,15 +1658,6 @@ dependencies = [
"subtle",
]
[[package]]
name = "directories"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16f5094c54661b38d03bd7e50df373292118db60b585c08a411c6d840017fe7d"
dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
@@ -1739,18 +1668,6 @@ dependencies = [
"dirs-sys-next",
]
[[package]]
name = "dirs-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
dependencies = [
"libc",
"option-ext",
"redox_users 0.5.0",
"windows-sys 0.59.0",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
@@ -1758,7 +1675,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
dependencies = [
"libc",
"redox_users 0.4.6",
"redox_users",
"winapi",
]
@@ -2410,13 +2327,13 @@ dependencies = [
[[package]]
name = "headless_chrome"
version = "1.0.17"
source = "git+https://github.com/rust-headless-chrome/rust-headless-chrome#8b66992826245cbf60377d619fc780f8c45abf8e"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c268ea01c2902b2acb382c1fae26818113dd661e0dba036a893f0ba40f00cdd8"
dependencies = [
"anyhow",
"auto_generate_cdp",
"base64 0.22.1",
"derive_builder",
"directories",
"log",
"rand 0.9.1",
"regex",
@@ -2425,12 +2342,9 @@ dependencies = [
"tempfile",
"thiserror 2.0.12",
"tungstenite 0.26.2",
"ureq 3.0.11",
"url",
"walkdir",
"which",
"winreg",
"zip",
]
[[package]]
@@ -3014,16 +2928,6 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jobserver"
version = "0.1.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
dependencies = [
"getrandom 0.3.2",
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.77"
@@ -3214,27 +3118,6 @@ dependencies = [
"hashbrown 0.15.3",
]
[[package]]
name = "lzma-rs"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e"
dependencies = [
"byteorder",
"crc",
]
[[package]]
name = "lzma-sys"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "mac"
version = "0.1.1"
@@ -3771,12 +3654,6 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "option-ext"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]]
name = "ordered-multimap"
version = "0.7.3"
@@ -4416,17 +4293,6 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "redox_users"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
dependencies = [
"getrandom 0.2.16",
"libredox",
"thiserror 2.0.12",
]
[[package]]
name = "ref-cast"
version = "1.0.24"
@@ -5245,12 +5111,6 @@ dependencies = [
"libc",
]
[[package]]
name = "simd-adler32"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
[[package]]
name = "simdutf8"
version = "0.1.5"
@@ -6348,36 +6208,6 @@ dependencies = [
"webpki-roots",
]
[[package]]
name = "ureq"
version = "3.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7a3e9af6113ecd57b8c63d3cd76a385b2e3881365f1f489e54f49801d0c83ea"
dependencies = [
"base64 0.22.1",
"flate2",
"log",
"percent-encoding",
"rustls",
"rustls-pemfile",
"rustls-pki-types",
"ureq-proto",
"utf-8",
"webpki-roots",
]
[[package]]
name = "ureq-proto"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fadf18427d33828c311234884b7ba2afb57143e6e7e69fda7ee883b624661e36"
dependencies = [
"base64 0.22.1",
"http",
"httparse",
"log",
]
[[package]]
name = "url"
version = "2.5.4"
@@ -7041,15 +6871,6 @@ version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda"
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]
[[package]]
name = "yaml-rust2"
version = "0.10.1"
@@ -7151,20 +6972,6 @@ name = "zeroize"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
dependencies = [
"zeroize_derive",
]
[[package]]
name = "zeroize_derive"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.101",
]
[[package]]
name = "zerovec"
@@ -7187,71 +6994,3 @@ dependencies = [
"quote",
"syn 2.0.101",
]
[[package]]
name = "zip"
version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dcb24d0152526ae49b9b96c1dcf71850ca1e0b882e4e28ed898a93c41334744"
dependencies = [
"aes",
"arbitrary",
"bzip2",
"constant_time_eq",
"crc32fast",
"crossbeam-utils",
"deflate64",
"flate2",
"getrandom 0.3.2",
"hmac",
"indexmap 2.9.0",
"lzma-rs",
"memchr",
"pbkdf2",
"sha1",
"time",
"xz2",
"zeroize",
"zopfli",
"zstd",
]
[[package]]
name = "zopfli"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edfc5ee405f504cd4984ecc6f14d02d55cfda60fa4b689434ef4102aae150cd7"
dependencies = [
"bumpalo",
"crc32fast",
"log",
"simd-adler32",
]
[[package]]
name = "zstd"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.15+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
dependencies = [
"cc",
"pkg-config",
]

53
Dockerfile Normal file
View File

@@ -0,0 +1,53 @@
# === Builder Stage ===
FROM clux/muslrust:1.86.0-stable as builder
WORKDIR /usr/src/minne
COPY Cargo.toml Cargo.lock ./
RUN mkdir -p api-router common composite-retrieval html-router ingestion-pipeline json-stream-parser main worker
COPY api-router/Cargo.toml ./api-router/
COPY common/Cargo.toml ./common/
COPY composite-retrieval/Cargo.toml ./composite-retrieval/
COPY html-router/Cargo.toml ./html-router/
COPY ingestion-pipeline/Cargo.toml ./ingestion-pipeline/
COPY json-stream-parser/Cargo.toml ./json-stream-parser/
COPY main/Cargo.toml ./main/
# Build with the MUSL target
RUN cargo build --release --target x86_64-unknown-linux-musl --bin main --features ingestion-pipeline/docker || true
# Copy the rest of the source code
COPY . .
# Build the final application binary with the MUSL target
RUN cargo build --release --target x86_64-unknown-linux-musl --bin main --features ingestion-pipeline/docker
# === Runtime Stage ===
FROM alpine:latest
RUN apk update && apk add --no-cache \
chromium \
nss \
freetype \
harfbuzz \
ca-certificates \
ttf-freefont \
font-noto-emoji \
&& \
rm -rf /var/cache/apk/*
ENV CHROME_BIN=/usr/bin/chromium-browser \
CHROME_PATH=/usr/lib/chromium/ \
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
# Create a non-root user to run the application
RUN adduser -D -h /home/appuser appuser
WORKDIR /home/appuser
USER appuser
# Copy the compiled binary from the builder stage (note the target path)
COPY --from=builder /usr/src/minne/target/x86_64-unknown-linux-musl/release/main /usr/local/bin/main
EXPOSE 3000
# EXPOSE 8000-9000
CMD ["main"]

44
docker-compose.yml Normal file
View File

@@ -0,0 +1,44 @@
version: '3.8'
services:
minne:
build: .
container_name: minne_app
ports:
- "3000:3000"
environment:
SURREALDB_ADDRESS: "ws://surrealdb:8000"
SURREALDB_USERNAME: "root_user"
SURREALDB_PASSWORD: "root_password"
SURREALDB_DATABASE: "test"
SURREALDB_NAMESPACE: "test"
OPENAI_API_KEY: "sk-key"
# RUST_LOG: "info"
depends_on:
- surrealdb
networks:
- minne-net
command: ["sh", "-c", "echo 'Waiting for SurrealDB to start...' && sleep 10 && echo 'Starting application...' && /usr/local/bin/main"]
surrealdb:
image: surrealdb/surrealdb:latest
container_name: minne_surrealdb
ports:
- "8000:8000"
volumes:
- ./database:/database # Mounts a 'database' folder from your project directory
command: >
start
--log debug
--user root_user
--pass root_password
rocksdb:./database/database.db
networks:
- minne-net
volumes:
surrealdb_data:
networks:
minne-net:
driver: bridge

View File

@@ -22,8 +22,10 @@ chrono = { version = "0.4.39", features = ["serde"] }
text-splitter = "0.18.1"
url = { version = "2.5.2", features = ["serde"] }
uuid = { version = "1.10.0", features = ["v4", "serde"] }
headless_chrome = { git = "https://github.com/rust-headless-chrome/rust-headless-chrome", features = ["fetch"] }
headless_chrome = "1.0.17"
common = { path = "../common" }
composite-retrieval = { path = "../composite-retrieval" }
[features]
docker = []

View File

@@ -16,7 +16,8 @@ use common::{
},
};
use dom_smoothie::{Article, Readability, TextMode};
use headless_chrome::Browser;
use headless_chrome::{Browser, LaunchOptionsBuilder};
use std::io::{Seek, SeekFrom};
use tempfile::NamedTempFile;
use tracing::{error, info};
@@ -76,7 +77,6 @@ pub async fn to_text_content(
}
}
}
use std::io::{Seek, SeekFrom}; // <-- Add Seek and SeekFrom
/// Fetches web content from a URL, extracts the main article text as Markdown,
/// captures a screenshot, and stores the screenshot returning [`FileInfo`].
@@ -106,7 +106,22 @@ async fn fetch_article_from_url(
// Instantiate timer
let now = Instant::now();
// Setup browser, navigate and wait
let browser = Browser::default()?;
let browser = {
#[cfg(feature = "docker")]
{
// Use this when compiling for docker
let options = LaunchOptionsBuilder::default()
.sandbox(false)
.build()
.map_err(|e| AppError::InternalError(e.to_string()))?;
Browser::new(options)?
}
#[cfg(not(feature = "docker"))]
{
// Use this otherwise
Browser::default()?
}
};
let tab = browser.new_tab()?;
let page = tab.navigate_to(url)?;
let loaded_page = page.wait_until_navigated()?;

1
result
View File

@@ -1 +0,0 @@
/nix/store/d4n7jbj3j7ql8v90kn4zlf4kj2zq81sy-minne