chore: dep updates & kv-mem separation to test feature

docker builder update
This commit is contained in:
Per Stark
2026-02-15 08:22:54 +01:00
parent b0b01182d7
commit 1490852a09
15 changed files with 1737 additions and 1664 deletions

3199
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -40,7 +40,7 @@ serde_json = "1.0.128"
serde = { version = "1", features = ["derive"] }
sha2 = "0.10.8"
surrealdb-migrations = "2.2.2"
surrealdb = { version = "2", features = ["kv-mem"] }
surrealdb = { version = "2" }
tempfile = "3.12.0"
text-splitter = { version = "0.18.1", features = ["markdown", "tokenizers"] }
tokenizers = { version = "0.20.4", features = ["http"] }

View File

@@ -1,5 +1,5 @@
# === Builder ===
FROM rust:1.86-bookworm AS builder
FROM rust:1.89-bookworm AS builder
WORKDIR /usr/src/minne
RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config clang cmake git && rm -rf /var/lib/apt/lists/*
@@ -30,8 +30,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 libstdc++6 curl \
&& rm -rf /var/lib/apt/lists/*
# ONNX Runtime (CPU). Change if you bump ort.
ARG ORT_VERSION=1.22.0
# ONNX Runtime (CPU). Keep in sync with ort crate requirements.
ARG ORT_VERSION=1.23.2
RUN mkdir -p /opt/onnxruntime && \
curl -fsSL -o /tmp/ort.tgz \
"https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \

View File

@@ -16,7 +16,7 @@ tracing = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
serde_json = { workspace = true }
surrealdb = { workspace = true, features = ["kv-mem"] }
surrealdb = { workspace = true }
async-openai = { workspace = true }
futures = { workspace = true }
tempfile = { workspace = true }
@@ -49,4 +49,7 @@ fastembed = { workspace = true }
[features]
test-utils = []
test-utils = ["surrealdb/kv-mem"]
[dev-dependencies]
surrealdb = { workspace = true, features = ["kv-mem"] }

View File

@@ -3,10 +3,10 @@
"devenv": {
"locked": {
"dir": "src/modules",
"lastModified": 1761839147,
"lastModified": 1771066302,
"owner": "cachix",
"repo": "devenv",
"rev": "bb7849648b68035f6b910120252c22b28195cf54",
"rev": "1b355dec9bddbaddbe4966d6fc30d7aa3af8575b",
"type": "github"
},
"original": {
@@ -22,10 +22,10 @@
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1761893049,
"lastModified": 1771052630,
"owner": "nix-community",
"repo": "fenix",
"rev": "c2ac9a5c0d6d16630c3b225b874bd14528d1abe6",
"rev": "d0555da98576b8611c25df0c208e51e9a182d95f",
"type": "github"
},
"original": {
@@ -37,14 +37,14 @@
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1761588595,
"owner": "edolstra",
"lastModified": 1767039857,
"owner": "NixOS",
"repo": "flake-compat",
"rev": "f387cd2afec9419c8ee37694406ca490c3f34ee5",
"rev": "5edf11c44bc78a0d334f6334cdaf7d60d732daab",
"type": "github"
},
"original": {
"owner": "edolstra",
"owner": "NixOS",
"repo": "flake-compat",
"type": "github"
}
@@ -58,10 +58,10 @@
]
},
"locked": {
"lastModified": 1760663237,
"lastModified": 1770726378,
"owner": "cachix",
"repo": "git-hooks.nix",
"rev": "ca5b894d3e3e151ffc1db040b6ce4dcc75d31c37",
"rev": "5eaaedde414f6eb1aea8b8525c466dc37bba95ae",
"type": "github"
},
"original": {
@@ -78,10 +78,10 @@
]
},
"locked": {
"lastModified": 1709087332,
"lastModified": 1762808025,
"owner": "hercules-ci",
"repo": "gitignore.nix",
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
"rev": "cb5e3fdca1de58ccbc3ef53de65bd372b48f567c",
"type": "github"
},
"original": {
@@ -92,10 +92,10 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1761672384,
"lastModified": 1771008912,
"owner": "nixos",
"repo": "nixpkgs",
"rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
"rev": "a82ccc39b39b621151d6732718e3e250109076fa",
"type": "github"
},
"original": {
@@ -107,10 +107,10 @@
},
"nixpkgs_2": {
"locked": {
"lastModified": 1761880412,
"lastModified": 1770843696,
"owner": "nixos",
"repo": "nixpkgs",
"rev": "a7fc11be66bdfb5cdde611ee5ce381c183da8386",
"rev": "2343bbb58f99267223bc2aac4fc9ea301a155a16",
"type": "github"
},
"original": {
@@ -135,10 +135,10 @@
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1761849405,
"lastModified": 1771007332,
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "f7de8ae045a5fe80f1203c5a1c3015b05f7c3550",
"rev": "bbc84d335fbbd9b3099d3e40c7469ee57dbd1873",
"type": "github"
},
"original": {
@@ -155,10 +155,10 @@
]
},
"locked": {
"lastModified": 1761878277,
"lastModified": 1771038269,
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "6604534e44090c917db714faa58d47861657690c",
"rev": "d7a86c8a4df49002446737603a3e0d7ef91a9637",
"type": "github"
},
"original": {

View File

@@ -33,3 +33,4 @@ clap = { version = "4.4", features = ["derive", "env"] }
[dev-dependencies]
tempfile = { workspace = true }
common = { path = "../common", features = ["test-utils"] }

View File

@@ -9,6 +9,8 @@ use std::{
use anyhow::{anyhow, Context, Result};
use async_openai::Client;
use chrono::Utc;
#[cfg(not(test))]
use common::utils::config::get_config;
use common::{
storage::{
db::SurrealDbClient,
@@ -421,11 +423,7 @@ async fn ingest_paragraph_batch(
return Ok(Vec::new());
}
let namespace = format!("ingest_eval_{}", Uuid::new_v4());
let db = Arc::new(
SurrealDbClient::memory(&namespace, "corpus")
.await
.context("creating in-memory surrealdb for ingestion")?,
);
let db = create_ingest_db(&namespace).await?;
db.apply_migrations()
.await
.context("applying migrations for ingestion")?;
@@ -487,6 +485,29 @@ async fn ingest_paragraph_batch(
Ok(shards)
}
#[cfg(test)]
async fn create_ingest_db(namespace: &str) -> Result<Arc<SurrealDbClient>> {
let db = SurrealDbClient::memory(namespace, "corpus")
.await
.context("creating in-memory surrealdb for ingestion")?;
Ok(Arc::new(db))
}
#[cfg(not(test))]
async fn create_ingest_db(namespace: &str) -> Result<Arc<SurrealDbClient>> {
let config = get_config().context("loading app config for ingestion database")?;
let db = SurrealDbClient::new(
&config.surrealdb_address,
&config.surrealdb_username,
&config.surrealdb_password,
namespace,
"corpus",
)
.await
.context("creating surrealdb database for ingestion")?;
Ok(Arc::new(db))
}
#[allow(clippy::too_many_arguments)]
async fn ingest_single_paragraph(
pipeline: Arc<IngestionPipeline>,

6
flake.lock generated
View File

@@ -35,11 +35,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1761672384,
"narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=",
"lastModified": 1771008912,
"narHash": "sha256-gf2AmWVTs8lEq7z/3ZAsgnZDhWIckkb+ZnAo5RzSxJg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
"rev": "a82ccc39b39b621151d6732718e3e250109076fa",
"type": "github"
},
"original": {

View File

@@ -41,5 +41,8 @@ common = { path = "../common" }
retrieval-pipeline = { path = "../retrieval-pipeline" }
json-stream-parser = { path = "../json-stream-parser" }
[dev-dependencies]
common = { path = "../common", features = ["test-utils"] }
[build-dependencies]
minijinja-embed = { version = "2.8.0" }

View File

@@ -1,4 +1,4 @@
/*! tailwindcss v4.1.16 | MIT License | https://tailwindcss.com */
/*! tailwindcss v4.1.18 | MIT License | https://tailwindcss.com */
@layer properties;
@layer theme, base, components, utilities;
@layer theme {
@@ -44,6 +44,7 @@
--leading-snug: 1.375;
--leading-relaxed: 1.625;
--ease-out: cubic-bezier(0, 0, 0.2, 1);
--ease-in-out: cubic-bezier(0.4, 0, 0.2, 1);
--animate-pulse: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
--default-transition-duration: 150ms;
--default-transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);

View File

@@ -6,7 +6,7 @@ use async_stream::stream;
use axum::{
extract::{Query, State},
response::{
sse::{Event, KeepAlive},
sse::{Event, KeepAlive, KeepAliveStream},
Sse,
},
};
@@ -42,10 +42,19 @@ use crate::{html_state::HtmlState, AuthSessionType};
use super::reference_validation::{collect_reference_ids_from_retrieval, validate_references};
type EventStream = Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>>;
type SseResponse = Sse<KeepAliveStream<EventStream>>;
fn sse_with_keep_alive(stream: EventStream) -> SseResponse {
Sse::new(stream).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive"),
)
}
// Error handling function
fn create_error_stream(
message: impl Into<String>,
) -> Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>> {
fn create_error_stream(message: impl Into<String>) -> EventStream {
let message = message.into();
stream::once(async move { Ok(Event::default().event("error").data(message)) }).boxed()
}
@@ -55,13 +64,10 @@ async fn get_message_and_user(
db: &SurrealDbClient,
current_user: Option<User>,
message_id: &str,
) -> Result<
(Message, User, Conversation, Vec<Message>, Option<Message>),
Sse<Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>>>,
> {
) -> Result<(Message, User, Conversation, Vec<Message>, Option<Message>), SseResponse> {
// Check authentication
let Some(user) = current_user else {
return Err(Sse::new(create_error_stream(
return Err(sse_with_keep_alive(create_error_stream(
"You must be signed in to use this feature",
)));
};
@@ -70,13 +76,13 @@ async fn get_message_and_user(
let message = match db.get_item::<Message>(message_id).await {
Ok(Some(message)) => message,
Ok(None) => {
return Err(Sse::new(create_error_stream(
return Err(sse_with_keep_alive(create_error_stream(
"Message not found: the specified message does not exist",
)))
}
Err(e) => {
error!("Database error retrieving message {}: {:?}", message_id, e);
return Err(Sse::new(create_error_stream(
return Err(sse_with_keep_alive(create_error_stream(
"Failed to retrieve message: database error",
)));
}
@@ -88,7 +94,7 @@ async fn get_message_and_user(
{
Err(e) => {
error!("Database error retrieving message {}: {:?}", message_id, e);
return Err(Sse::new(create_error_stream(
return Err(sse_with_keep_alive(create_error_stream(
"Failed to retrieve message: database error",
)));
}
@@ -96,19 +102,19 @@ async fn get_message_and_user(
};
let Some(message_index) = find_message_index(&history, message_id) else {
return Err(Sse::new(create_error_stream(
return Err(sse_with_keep_alive(create_error_stream(
"Message not found in conversation history",
)));
};
let Some(message_from_history) = history.get(message_index) else {
return Err(Sse::new(create_error_stream(
return Err(sse_with_keep_alive(create_error_stream(
"Message not found in conversation history",
)));
};
if message_from_history.role != MessageRole::User {
return Err(Sse::new(create_error_stream(
return Err(sse_with_keep_alive(create_error_stream(
"Only user messages can be used to generate a response",
)));
}
@@ -144,10 +150,7 @@ fn history_before_message(messages: &[Message], message_index: usize) -> Vec<Mes
messages.iter().take(message_index).cloned().collect()
}
fn create_replayed_response_stream(
state: &HtmlState,
existing_ai_message: Message,
) -> Sse<Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>>> {
fn create_replayed_response_stream(state: &HtmlState, existing_ai_message: Message) -> SseResponse {
let references_event = if existing_ai_message
.references
.as_ref()
@@ -179,11 +182,7 @@ fn create_replayed_response_stream(
yield Ok(Event::default().event("close_stream").data("Stream complete"));
};
Sse::new(event_stream.boxed()).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive"),
)
sse_with_keep_alive(event_stream.boxed())
}
#[derive(Deserialize)]
@@ -209,7 +208,7 @@ pub async fn get_response_stream(
State(state): State<HtmlState>,
auth: AuthSessionType,
Query(params): Query<QueryParams>,
) -> Sse<Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>>> {
) -> SseResponse {
// 1. Authentication and initial data validation
let (user_message, user, _conversation, history, existing_ai_response) =
match get_message_and_user(&state.db, auth.current_user, &params.message_id).await {
@@ -249,7 +248,7 @@ pub async fn get_response_stream(
{
Ok(result) => result,
Err(_e) => {
return Sse::new(create_error_stream("Failed to retrieve knowledge"));
return sse_with_keep_alive(create_error_stream("Failed to retrieve knowledge"));
}
};
@@ -269,17 +268,17 @@ pub async fn get_response_stream(
let formatted_user_message =
create_user_message_with_history(&context_json, &history, &user_message.content);
let Ok(settings) = SystemSettings::get_current(&state.db).await else {
return Sse::new(create_error_stream("Failed to retrieve system settings"));
return sse_with_keep_alive(create_error_stream("Failed to retrieve system settings"));
};
let Ok(request) = create_chat_request(formatted_user_message, &settings) else {
return Sse::new(create_error_stream("Failed to create chat request"));
return sse_with_keep_alive(create_error_stream("Failed to create chat request"));
};
// 4. Set up the OpenAI stream
let openai_stream = match state.openai_client.chat().create_stream(request).await {
Ok(stream) => stream,
Err(_e) => {
return Sse::new(create_error_stream("Failed to create OpenAI stream"));
return sse_with_keep_alive(create_error_stream("Failed to create OpenAI stream"));
}
};
@@ -460,11 +459,7 @@ pub async fn get_response_stream(
.data("Stream complete"))
}));
Sse::new(event_stream.boxed()).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive"),
)
sse_with_keep_alive(event_stream.boxed())
}
struct StreamParserState {

View File

@@ -4,7 +4,7 @@ use axum::{
extract::{Query, State},
http::StatusCode,
response::{
sse::{Event, KeepAlive},
sse::{Event, KeepAlive, KeepAliveStream},
IntoResponse, Response, Sse,
},
};
@@ -36,6 +36,17 @@ use crate::{
AuthSessionType,
};
type EventStream = Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>>;
type TaskSse = Sse<KeepAliveStream<EventStream>>;
fn sse_with_keep_alive(stream: EventStream) -> TaskSse {
Sse::new(stream).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive-ping"),
)
}
pub async fn show_ingest_form(
State(state): State<HtmlState>,
RequireUser(user): RequireUser,
@@ -158,9 +169,7 @@ pub struct QueryParams {
task_id: String,
}
fn create_error_stream(
message: impl Into<String>,
) -> Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>> {
fn create_error_stream(message: impl Into<String>) -> EventStream {
let message = message.into();
stream::once(async move { Ok(Event::default().event("error").data(message)) }).boxed()
}
@@ -169,13 +178,13 @@ pub async fn get_task_updates_stream(
State(state): State<HtmlState>,
auth: AuthSessionType,
Query(params): Query<QueryParams>,
) -> Sse<Pin<Box<dyn Stream<Item = Result<Event, axum::Error>> + Send>>> {
) -> TaskSse {
let task_id = params.task_id.clone();
let db = state.db.clone();
// 1. Check for authenticated user
let Some(current_user) = auth.current_user else {
return Sse::new(create_error_stream("User not authenticated"));
return sse_with_keep_alive(create_error_stream("User not authenticated"));
};
// 2. Fetch task for initial authorization and to ensure it exists
@@ -183,7 +192,7 @@ pub async fn get_task_updates_stream(
Ok(Some(task)) => {
// 3. Validate user ownership
if task.user_id != current_user.id {
return Sse::new(create_error_stream(
return sse_with_keep_alive(create_error_stream(
"Access denied: You do not have permission to view updates for this task.",
));
}
@@ -269,18 +278,14 @@ pub async fn get_task_updates_stream(
}
};
Sse::new(sse_stream.boxed()).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive-ping"),
)
sse_with_keep_alive(sse_stream.boxed())
}
Ok(None) => Sse::new(create_error_stream(format!(
Ok(None) => sse_with_keep_alive(create_error_stream(format!(
"Task with ID '{task_id}' not found."
))),
Err(e) => {
error!("Failed to fetch task '{task_id}' for authorization: {e:?}");
Sse::new(create_error_stream(
sse_with_keep_alive(create_error_stream(
"An error occurred while retrieving task details. Please try again later.",
))
}

View File

@@ -38,3 +38,6 @@ retrieval-pipeline = { path = "../retrieval-pipeline" }
[features]
docker = []
[dev-dependencies]
common = { path = "../common", features = ["test-utils"] }

View File

@@ -30,6 +30,7 @@ retrieval-pipeline = { path = "../retrieval-pipeline" }
[dev-dependencies]
tower = "0.5"
uuid = { workspace = true }
common = { path = "../common", features = ["test-utils"] }
[[bin]]
name = "server"

View File

@@ -23,4 +23,7 @@ uuid = { workspace = true }
fastembed = { workspace = true }
clap = { version = "4.4", features = ["derive"] }
common = { path = "../common" }
[dev-dependencies]
common = { path = "../common", features = ["test-utils"] }