1 Commits

Author SHA1 Message Date
Per Stark
9a623cbc3f docs: evaluations instructions and readme refactoring 2025-12-22 18:32:59 +01:00
58 changed files with 681 additions and 932 deletions

View File

@@ -1,9 +1,8 @@
# Changelog
## 1.0.0 (2026-01-02)
- **Locally generated embeddings are now default**. If you want to continue using API embeddings, set EMBEDDING_BACKEND to openai. This will download a ONNX model and recreate all embeddings. But in most instances it's very worth it. Removing the network bound call to create embeddings. Creating embeddings on my N100 device is extremely fast. Typically a search response is provided in less than 50ms.
## Unreleased
- Added a benchmarks create for evaluating the retrieval process
- Added fastembed embedding support, enables the use of local CPU generated embeddings, greatly improved latency if machine can handle it. Quick search has vastly better accuracy and is much faster, 50ms latency when testing compared to minimum 300ms.
- Embeddings stored on own table.
- Embeddings stored on own table
- Refactored retrieval pipeline to use the new, faster and more accurate strategy. Read [blog post](https://blog.stark.pub/posts/eval-retrieval-refactor/) for more details.
## Version 0.2.7 (2025-12-04)

2
Cargo.lock generated
View File

@@ -3820,7 +3820,7 @@ checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
[[package]]
name = "main"
version = "1.0.0"
version = "0.2.7"
dependencies = [
"anyhow",
"api-router",

View File

@@ -1,6 +1,6 @@
# Minne
**A graph-powered personal knowledge base that makes storing easy.**
**A graph-powered personal knowledge base that remembers for you.**
Capture content effortlessly, let AI discover connections, and explore your knowledge visually. Self-hosted and privacy-focused.
@@ -8,8 +8,6 @@ Capture content effortlessly, let AI discover connections, and explore your know
[![License: AGPL v3](https://img.shields.io/badge/License-AGPL_v3-blue.svg)](https://www.gnu.org/licenses/agpl-3.0)
[![Latest Release](https://img.shields.io/github/v/release/perstarkse/minne?sort=semver)](https://github.com/perstarkse/minne/releases/latest)
![Screenshot](./screenshot-graph.webp)
## Try It
**[Live Demo](https://minne-demo.stark.pub)** — Read-only demo deployment
@@ -26,20 +24,17 @@ docker compose up -d
# Open http://localhost:3000
```
Or with Nix (with environment variables set):
Or with Nix:
```bash
nix run 'github:perstarkse/minne#main'
```
Pre-built binaries for Windows, macOS, and Linux are available on the [Releases](https://github.com/perstarkse/minne/releases/latest) page.
## Features
- **Fast** — Rust backend with server-side rendering and HTMX for snappy interactions
- **Search & Chat** — Search or use conversational AI to find and reason about content
- **Search & Chat** — Full-text search or conversational AI to find and reason about content
- **Knowledge Graph** — Visual exploration with automatic or manual relationship curation
- **Hybrid Retrieval** — Vector similarity + full-text for relevant results
- **Hybrid Retrieval** — Vector similarity + full-text + graph traversal for relevant results
- **Multi-Format** — Ingest text, URLs, PDFs, audio, and images
- **Self-Hosted** — Your data, your server, any OpenAI-compatible API

View File

@@ -62,36 +62,9 @@ DEFINE TABLE OVERWRITE conversation SCHEMAFULL;
DEFINE TABLE OVERWRITE file SCHEMAFULL;
DEFINE TABLE OVERWRITE knowledge_entity SCHEMAFULL;
DEFINE TABLE OVERWRITE message SCHEMAFULL;
DEFINE TABLE OVERWRITE relates_to SCHEMAFULL TYPE RELATION;
DEFINE FIELD IF NOT EXISTS in ON relates_to TYPE record<knowledge_entity>;
DEFINE FIELD IF NOT EXISTS out ON relates_to TYPE record<knowledge_entity>;
DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;
DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;
DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;
DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;
DEFINE TABLE OVERWRITE relates_to SCHEMAFULL;
DEFINE TABLE OVERWRITE scratchpad SCHEMAFULL;
DEFINE TABLE OVERWRITE system_settings SCHEMAFULL;
DEFINE TABLE OVERWRITE text_chunk SCHEMAFULL;
-- text_content must have fields defined before enforcing SCHEMAFULL
DEFINE TABLE OVERWRITE text_content SCHEMAFULL;
DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;
DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE datetime;
DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;
DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;
DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;
DEFINE TABLE OVERWRITE user SCHEMAFULL;

View File

@@ -1 +0,0 @@
{"schemas":"--- original\n+++ modified\n@@ -242,7 +242,7 @@\n\n # Defines the schema for the 'text_content' table.\n\n-DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n+DEFINE TABLE IF NOT EXISTS text_content SCHEMAFULL;\n\n # Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;\n@@ -254,10 +254,24 @@\n DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;\n # UrlInfo is a struct, store as object\n DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;\n+DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;\n+\n DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;\n DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;\n DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;\n\n+# FileInfo fields\n+DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;\n+\n # Indexes based on query patterns\n DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;\n","events":null}

View File

@@ -1,6 +1,6 @@
# Defines the schema for the 'text_content' table.
DEFINE TABLE IF NOT EXISTS text_content SCHEMAFULL;
DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;
@@ -12,24 +12,10 @@ DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
# UrlInfo is a struct, store as object
DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
# FileInfo fields
DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;
DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;
DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;
# Indexes based on query patterns
DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;
DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;

View File

@@ -208,26 +208,7 @@ async fn ensure_runtime_indexes_inner(
)
.await
}
HnswIndexState::Matches => {
let status = get_index_status(db, spec.index_name, spec.table).await?;
if status.eq_ignore_ascii_case("error") {
warn!(
index = spec.index_name,
table = spec.table,
"HNSW index found in error state; triggering rebuild"
);
create_index_with_polling(
db,
spec.definition_overwrite(embedding_dimension),
spec.index_name,
spec.table,
Some(spec.table),
)
.await
} else {
Ok(())
}
}
HnswIndexState::Matches => Ok(()),
HnswIndexState::Different(existing) => {
info!(
index = spec.index_name,
@@ -253,30 +234,6 @@ async fn ensure_runtime_indexes_inner(
Ok(())
}
async fn get_index_status(db: &SurrealDbClient, index_name: &str, table: &str) -> Result<String> {
let info_query = format!("INFO FOR INDEX {index_name} ON TABLE {table};");
let mut info_res = db
.client
.query(info_query)
.await
.context("checking index status")?;
let info: Option<Value> = info_res.take(0).context("failed to take info result")?;
let info = match info {
Some(i) => i,
None => return Ok("unknown".to_string()),
};
let building = info.get("building");
let status = building
.and_then(|b| b.get("status"))
.and_then(|s| s.as_str())
.unwrap_or("ready")
.to_string();
Ok(status)
}
async fn rebuild_indexes_inner(db: &SurrealDbClient) -> Result<()> {
debug!("Rebuilding indexes with concurrent definitions");
create_fts_analyzer(db).await?;

View File

@@ -3,10 +3,7 @@ use bytes;
use mime_guess::from_path;
use object_store::Error as ObjectStoreError;
use sha2::{Digest, Sha256};
use std::{
io::{BufReader, Read},
path::Path,
};
use std::{io::{BufReader, Read}, path::Path};
use tempfile::NamedTempFile;
use thiserror::Error;
use tokio::task;

View File

@@ -460,11 +460,7 @@ impl KnowledgeEntity {
for (i, entity) in all_entities.iter().enumerate() {
if i > 0 && i % 100 == 0 {
info!(
progress = i,
total = total_entities,
"Re-embedding progress"
);
info!(progress = i, total = total_entities, "Re-embedding progress");
}
let embedding_input = format!(
@@ -489,32 +485,6 @@ impl KnowledgeEntity {
new_embeddings.insert(entity.id.clone(), (embedding, entity.user_id.clone()));
}
info!("Successfully generated all new embeddings.");
info!("Successfully generated all new embeddings.");
// Clear existing embeddings and index first to prevent SurrealDB panics and dimension conflicts.
info!("Removing old index and clearing embeddings...");
// Explicitly remove the index first. This prevents background HNSW maintenance from crashing
// when we delete/replace data, dealing with a known SurrealDB panic.
db.client
.query(format!(
"REMOVE INDEX idx_embedding_knowledge_entity_embedding ON TABLE {};",
KnowledgeEntityEmbedding::table_name()
))
.await
.map_err(AppError::Database)?
.check()
.map_err(AppError::Database)?;
db.client
.query(format!(
"DELETE FROM {};",
KnowledgeEntityEmbedding::table_name()
))
.await
.map_err(AppError::Database)?
.check()
.map_err(AppError::Database)?;
// Perform DB updates in a single transaction
info!("Applying embedding updates in a transaction...");
@@ -530,11 +500,11 @@ impl KnowledgeEntity {
.join(",")
);
transaction_query.push_str(&format!(
"CREATE type::thing('knowledge_entity_embedding', '{id}') SET \
"UPSERT type::thing('knowledge_entity_embedding', '{id}') SET \
entity_id = type::thing('knowledge_entity', '{id}'), \
embedding = {embedding}, \
user_id = '{user_id}', \
created_at = time::now(), \
created_at = IF created_at != NONE THEN created_at ELSE time::now() END, \
updated_at = time::now();",
id = id,
embedding = embedding_str,
@@ -550,12 +520,7 @@ impl KnowledgeEntity {
transaction_query.push_str("COMMIT TRANSACTION;");
// Execute the entire atomic operation
db.client
.query(transaction_query)
.await
.map_err(AppError::Database)?
.check()
.map_err(AppError::Database)?;
db.query(transaction_query).await?;
info!("Re-embedding process for knowledge entities completed successfully.");
Ok(())

View File

@@ -55,7 +55,7 @@ impl KnowledgeRelationship {
relationship_type = self.metadata.relationship_type.as_str()
);
db_client.query(query).await?.check()?;
db_client.query(query).await?;
Ok(())
}
@@ -99,7 +99,9 @@ impl KnowledgeRelationship {
Err(AppError::NotFound(format!("Relationship {id} not found")))
}
} else {
db_client.query(format!("DELETE relates_to:`{id}`")).await?;
db_client
.query(format!("DELETE relates_to:`{id}`"))
.await?;
Ok(())
}
}
@@ -159,7 +161,7 @@ mod tests {
}
#[tokio::test]
async fn test_store_and_verify_by_source_id() {
async fn test_store_relationship() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
@@ -167,10 +169,6 @@ mod tests {
.await
.expect("Failed to start in-memory surrealdb");
db.apply_migrations()
.await
.expect("Failed to apply migrations");
// Create two entities to relate
let entity1_id = create_test_entity("Entity 1", &db).await;
let entity2_id = create_test_entity("Entity 2", &db).await;
@@ -211,7 +209,7 @@ mod tests {
}
#[tokio::test]
async fn test_store_and_delete_relationship() {
async fn test_delete_relationship_by_id() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
@@ -236,7 +234,7 @@ mod tests {
relationship_type,
);
// Store relationship
// Store the relationship
relationship
.store_relationship(&db)
.await
@@ -257,12 +255,12 @@ mod tests {
"Relationship should exist before deletion"
);
// Delete relationship by ID
// Delete the relationship by ID
KnowledgeRelationship::delete_relationship_by_id(&relationship.id, &user_id, &db)
.await
.expect("Failed to delete relationship by ID");
// Query to verify relationship was deleted
// Query to verify the relationship was deleted
let mut result = db
.query(format!(
"SELECT * FROM relates_to WHERE metadata.user_id = '{}' AND metadata.source_id = '{}'",
@@ -272,7 +270,7 @@ mod tests {
.expect("Query failed");
let results: Vec<KnowledgeRelationship> = result.take(0).unwrap_or_default();
// Verify relationship no longer exists
// Verify the relationship no longer exists
assert!(results.is_empty(), "Relationship should be deleted");
}
@@ -344,7 +342,7 @@ mod tests {
}
#[tokio::test]
async fn test_store_relationship_exists() {
async fn test_delete_relationships_by_source_id() {
// Setup in-memory database for testing
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();

View File

@@ -116,7 +116,7 @@ macro_rules! stored_object {
}
$(#[$struct_attr])*
$(#[$struct_attr])*
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct $name {
#[serde(deserialize_with = "deserialize_flexible_id")]

View File

@@ -352,12 +352,12 @@ impl TextChunk {
// Generate all new embeddings in memory
let mut new_embeddings: HashMap<String, (Vec<f32>, String, String)> = HashMap::new();
info!("Generating new embeddings for all chunks...");
for (i, chunk) in all_chunks.iter().enumerate() {
if i > 0 && i % 100 == 0 {
info!(progress = i, total = total_chunks, "Re-embedding progress");
}
let embedding = provider
.embed(&chunk.chunk)
.await
@@ -379,28 +379,6 @@ impl TextChunk {
}
info!("Successfully generated all new embeddings.");
// Clear existing embeddings and index first to prevent SurrealDB panics and dimension conflicts.
info!("Removing old index and clearing embeddings...");
// Explicitly remove the index first. This prevents background HNSW maintenance from crashing
// when we delete/replace data, dealing with a known SurrealDB panic.
db.client
.query(format!(
"REMOVE INDEX idx_embedding_text_chunk_embedding ON TABLE {};",
TextChunkEmbedding::table_name()
))
.await
.map_err(AppError::Database)?
.check()
.map_err(AppError::Database)?;
db.client
.query(format!("DELETE FROM {};", TextChunkEmbedding::table_name()))
.await
.map_err(AppError::Database)?
.check()
.map_err(AppError::Database)?;
// Perform DB updates in a single transaction against the embedding table
info!("Applying embedding updates in a transaction...");
let mut transaction_query = String::from("BEGIN TRANSACTION;");
@@ -416,12 +394,12 @@ impl TextChunk {
);
write!(
&mut transaction_query,
"CREATE type::thing('text_chunk_embedding', '{id}') SET \
"UPSERT type::thing('text_chunk_embedding', '{id}') SET \
chunk_id = type::thing('text_chunk', '{id}'), \
source_id = '{source_id}', \
embedding = {embedding}, \
user_id = '{user_id}', \
created_at = time::now(), \
created_at = IF created_at != NONE THEN created_at ELSE time::now() END, \
updated_at = time::now();",
id = id,
embedding = embedding_str,
@@ -440,12 +418,7 @@ impl TextChunk {
transaction_query.push_str("COMMIT TRANSACTION;");
db.client
.query(transaction_query)
.await
.map_err(AppError::Database)?
.check()
.map_err(AppError::Database)?;
db.query(transaction_query).await?;
info!("Re-embedding process for text chunks completed successfully.");
Ok(())

View File

@@ -250,8 +250,9 @@ impl EmbeddingProvider {
match config.embedding_backend {
EmbeddingBackend::OpenAI => {
let client = openai_client
.ok_or_else(|| anyhow!("OpenAI embedding backend requires an OpenAI client"))?;
let client = openai_client.ok_or_else(|| {
anyhow!("OpenAI embedding backend requires an OpenAI client")
})?;
// Use defaults that match SystemSettings initial values
Self::new_openai(client, "text-embedding-3-small".to_string(), 1536)
}

View File

@@ -12,13 +12,11 @@ include = ["lib"]
# The installers to generate for each app
installers = []
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]
targets = ["aarch64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]
# Skip checking whether the specified configuration files are up to date
allow-dirty = ["ci"]
[dist.github-custom-runners]
aarch64-apple-darwin = "macos-latest"
x86_64-apple-darwin = "macos-15-intel"
x86_64-unknown-linux-gnu = "ubuntu-22.04"
x86_64-unknown-linux-musl = "ubuntu-22.04"
x86_64-pc-windows-msvc = "windows-latest"

View File

@@ -47,7 +47,7 @@ Content In → Ingestion Pipeline → SurrealDB
Query → Retrieval Pipeline → Results
Vector Search + FTS
Vector Search + FTS + Graph
RRF Fusion → (Optional Rerank) → Response
```
@@ -70,5 +70,5 @@ Embeddings are stored in dedicated tables with HNSW indexes for fast vector sear
1. **Collect candidates** — Vector similarity + full-text search
2. **Merge ranks** — Reciprocal Rank Fusion (RRF)
3. **Attach context** — Link chunks to parent entities
4. **Rerank** (optional) — Cross-encoder reranking
4. **Rerank** (optional) — Cross-encoder rescoring
5. **Return** — Top-k results with metadata

View File

@@ -13,7 +13,6 @@ Minne can be configured via environment variables or a `config.yaml` file. Envir
| `SURREALDB_DATABASE` | Database name | `minne_db` |
| `SURREALDB_NAMESPACE` | Namespace | `minne_ns` |
## Optional Settings
| Variable | Description | Default |
@@ -22,20 +21,14 @@ Minne can be configured via environment variables or a `config.yaml` file. Envir
| `DATA_DIR` | Local data directory | `./data` |
| `OPENAI_BASE_URL` | Custom AI provider URL | OpenAI default |
| `RUST_LOG` | Logging level | `info` |
| `STORAGE` | Storage backend (`local`, `memory`) | `local` |
| `PDF_INGEST_MODE` | PDF ingestion strategy (`classic`, `llm-first`) | `llm-first` |
| `RETRIEVAL_STRATEGY` | Default retrieval strategy | - |
| `EMBEDDING_BACKEND` | Embedding provider (`openai`, `fastembed`) | `fastembed` |
| `FASTEMBED_CACHE_DIR` | Model cache directory | `<data_dir>/fastembed` |
| `FASTEMBED_SHOW_DOWNLOAD_PROGRESS` | Show progress bar for model downloads | `false` |
| `FASTEMBED_MAX_LENGTH` | Max sequence length for FastEmbed models | - |
### Reranking (Optional)
| Variable | Description | Default |
|----------|-------------|---------|
| `RERANKING_ENABLED` | Enable FastEmbed reranking | `false` |
| `RERANKING_POOL_SIZE` | Concurrent reranker workers | - |
| `RERANKING_POOL_SIZE` | Concurrent reranker workers | `2` |
| `FASTEMBED_CACHE_DIR` | Model cache directory | `<data_dir>/fastembed/reranker` |
> [!NOTE]
> Enabling reranking downloads ~1.1 GB of model data on first startup.
@@ -52,11 +45,6 @@ openai_api_key: "sk-your-key-here"
data_dir: "./minne_data"
http_port: 3000
# New settings
storage: "local"
pdf_ingest_mode: "llm-first"
embedding_backend: "fastembed"
# Optional reranking
reranking_enabled: true
reranking_pool_size: 2

View File

@@ -893,6 +893,158 @@ mod tests {
}
}
#[tokio::test]
async fn seeds_manifest_with_transactional_batches() {
let namespace = "test_ns";
let database = Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, &database)
.await
.expect("memory db");
db.apply_migrations()
.await
.expect("apply migrations for memory db");
let manifest = build_manifest();
seed_manifest_into_db(&db, &manifest)
.await
.expect("manifest seed should succeed");
let text_contents: Vec<TextContent> = db
.client
.query(format!("SELECT * FROM {};", TextContent::table_name()))
.await
.expect("select text_content")
.take(0)
.unwrap_or_default();
assert_eq!(text_contents.len(), 1);
let entities: Vec<KnowledgeEntity> = db
.client
.query(format!("SELECT * FROM {};", KnowledgeEntity::table_name()))
.await
.expect("select knowledge_entity")
.take(0)
.unwrap_or_default();
assert_eq!(entities.len(), 1);
let chunks: Vec<TextChunk> = db
.client
.query(format!("SELECT * FROM {};", TextChunk::table_name()))
.await
.expect("select text_chunk")
.take(0)
.unwrap_or_default();
assert_eq!(chunks.len(), 1);
let relationships: Vec<KnowledgeRelationship> = db
.client
.query("SELECT * FROM relates_to;")
.await
.expect("select relates_to")
.take(0)
.unwrap_or_default();
assert_eq!(relationships.len(), 1);
let entity_embeddings: Vec<KnowledgeEntityEmbedding> = db
.client
.query(format!(
"SELECT * FROM {};",
KnowledgeEntityEmbedding::table_name()
))
.await
.expect("select knowledge_entity_embedding")
.take(0)
.unwrap_or_default();
assert_eq!(entity_embeddings.len(), 1);
let chunk_embeddings: Vec<TextChunkEmbedding> = db
.client
.query(format!(
"SELECT * FROM {};",
TextChunkEmbedding::table_name()
))
.await
.expect("select text_chunk_embedding")
.take(0)
.unwrap_or_default();
assert_eq!(chunk_embeddings.len(), 1);
}
#[tokio::test]
async fn rolls_back_when_embeddings_mismatch_index_dimension() {
let namespace = "test_ns_rollback";
let database = Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, &database)
.await
.expect("memory db");
db.apply_migrations()
.await
.expect("apply migrations for memory db");
let manifest = build_manifest();
let result = seed_manifest_into_db(&db, &manifest).await;
assert!(
result.is_ok(),
"seeding should succeed even if embedding dimensions differ from default index"
);
let text_contents: Vec<TextContent> = db
.client
.query(format!("SELECT * FROM {};", TextContent::table_name()))
.await
.expect("select text_content")
.take(0)
.unwrap_or_default();
let entities: Vec<KnowledgeEntity> = db
.client
.query(format!("SELECT * FROM {};", KnowledgeEntity::table_name()))
.await
.expect("select knowledge_entity")
.take(0)
.unwrap_or_default();
let chunks: Vec<TextChunk> = db
.client
.query(format!("SELECT * FROM {};", TextChunk::table_name()))
.await
.expect("select text_chunk")
.take(0)
.unwrap_or_default();
let relationships: Vec<KnowledgeRelationship> = db
.client
.query("SELECT * FROM relates_to;")
.await
.expect("select relates_to")
.take(0)
.unwrap_or_default();
let entity_embeddings: Vec<KnowledgeEntityEmbedding> = db
.client
.query(format!(
"SELECT * FROM {};",
KnowledgeEntityEmbedding::table_name()
))
.await
.expect("select knowledge_entity_embedding")
.take(0)
.unwrap_or_default();
let chunk_embeddings: Vec<TextChunkEmbedding> = db
.client
.query(format!(
"SELECT * FROM {};",
TextChunkEmbedding::table_name()
))
.await
.expect("select text_chunk_embedding")
.take(0)
.unwrap_or_default();
assert_eq!(text_contents.len(), 1);
assert_eq!(entities.len(), 1);
assert_eq!(chunks.len(), 1);
assert_eq!(relationships.len(), 1);
assert_eq!(entity_embeddings.len(), 1);
assert_eq!(chunk_embeddings.len(), 1);
}
#[test]
fn window_manifest_trims_questions_and_negatives() {
let manifest = build_manifest();

View File

@@ -7,7 +7,7 @@ use std::{
use anyhow::{anyhow, Context, Result};
use common::storage::{db::SurrealDbClient, types::text_chunk::TextChunk};
use crate::{args::Config, corpus, eval::connect_eval_db, snapshot::DbSnapshotState};
use crate::{args::Config, eval::connect_eval_db, corpus, snapshot::DbSnapshotState};
pub async fn inspect_question(config: &Config) -> Result<()> {
let question_id = config

View File

@@ -20,10 +20,9 @@ use retrieval_pipeline::{
use crate::{
args::Config,
cache::EmbeddingCache,
corpus,
datasets::ConvertedDataset,
eval::{CaseDiagnostics, CaseSummary, EvaluationStageTimings, EvaluationSummary, SeededCase},
slice, snapshot,
corpus, slice, snapshot,
};
pub(super) struct EvaluationContext<'a> {

View File

@@ -3,7 +3,7 @@ use std::time::Instant;
use anyhow::Context;
use tracing::info;
use crate::{corpus, eval::can_reuse_namespace, slice, snapshot};
use crate::{eval::can_reuse_namespace, corpus, slice, snapshot};
use super::super::{
context::{EvalStage, EvaluationContext},

View File

@@ -5,12 +5,12 @@ use common::storage::types::system_settings::SystemSettings;
use tracing::{info, warn};
use crate::{
corpus,
db_helpers::{recreate_indexes, remove_all_indexes, reset_namespace},
eval::{
can_reuse_namespace, cases_from_manifest, enforce_system_settings, ensure_eval_user,
record_namespace_state, warm_hnsw_cache,
},
corpus,
};
use super::super::{

View File

@@ -48,9 +48,7 @@ pub(crate) async fn prepare_slice(
.database
.db_namespace
.clone()
.unwrap_or_else(|| {
default_namespace(ctx.dataset().metadata.id.as_str(), ctx.config().limit)
});
.unwrap_or_else(|| default_namespace(ctx.dataset().metadata.id.as_str(), ctx.config().limit));
ctx.database = ctx
.config()
.database

View File

@@ -608,7 +608,7 @@
line-height: inherit;
}
.markdown-content :not(pre)>code {
.markdown-content :not(pre) > code {
background-color: rgba(0, 0, 0, 0.05);
color: var(--color-base-content);
padding: 0.15em 0.4em;
@@ -662,7 +662,7 @@
color: var(--color-base-content);
}
[data-theme="dark"] .markdown-content :not(pre)>code {
[data-theme="dark"] .markdown-content :not(pre) > code {
background-color: rgba(255, 255, 255, 0.12);
color: var(--color-base-content);
}
@@ -677,136 +677,6 @@
z-index: 9999;
box-shadow: var(--nb-shadow);
}
/* .nb-label: Uppercase, bold, tracking-wide, text-xs for section headers */
.nb-label {
@apply uppercase font-bold tracking-wide text-xs;
}
/* .nb-data: JetBrains Mono, tabular-nums for timestamps, IDs, badges */
.nb-data {
font-family: 'JetBrains Mono', ui-monospace, SFMono-Regular, monospace;
font-variant-numeric: tabular-nums;
}
/* The Stamp: Button :active state pushes into page */
.nb-btn:active {
transform: translate(2px, 2px) !important;
box-shadow: 2px 2px 0 0 #000 !important;
}
/* Staggered Card Dealing Animation */
@keyframes deal-in {
0% {
opacity: 0;
transform: translateY(12px);
}
100% {
opacity: 1;
transform: translateY(0);
}
}
/* Staggered deal-in animation - STRICTLY SCOPED to main content area */
main .nb-card,
main .nb-panel {
animation: deal-in 300ms var(--ease-mechanical, cubic-bezier(0.25, 1, 0.5, 1)) backwards;
}
/* Exclude elements that shouldn't animate even inside main */
main nav.nb-panel,
main .no-animation {
animation: none;
}
/* Apply staggered delays only to direct children of grids/lists or top-level containers */
main .nb-masonry>.nb-card:nth-child(1),
main .grid>.nb-panel:nth-child(1) {
animation-delay: 0ms;
}
main .nb-masonry>.nb-card:nth-child(2),
main .grid>.nb-panel:nth-child(2) {
animation-delay: 50ms;
}
main .nb-masonry>.nb-card:nth-child(3),
main .grid>.nb-panel:nth-child(3) {
animation-delay: 100ms;
}
main .nb-masonry>.nb-card:nth-child(4),
main .grid>.nb-panel:nth-child(4) {
animation-delay: 150ms;
}
main .nb-masonry>.nb-card:nth-child(5),
main .grid>.nb-panel:nth-child(5) {
animation-delay: 200ms;
}
main .nb-masonry>.nb-card:nth-child(6),
main .grid>.nb-panel:nth-child(6) {
animation-delay: 250ms;
}
main .nb-masonry>.nb-card:nth-child(7),
main .grid>.nb-panel:nth-child(7) {
animation-delay: 300ms;
}
main .nb-masonry>.nb-card:nth-child(8),
main .grid>.nb-panel:nth-child(8) {
animation-delay: 350ms;
}
main .nb-masonry>.nb-card:nth-child(n+9),
main .grid>.nb-panel:nth-child(n+9) {
animation-delay: 400ms;
}
/* HTMX Swap Fade-Up Animation */
@keyframes fade-up {
0% {
opacity: 0;
transform: translateY(8px);
}
100% {
opacity: 1;
transform: translateY(0);
}
}
.animate-fade-up {
animation: fade-up 200ms var(--ease-mechanical, cubic-bezier(0.25, 1, 0.5, 1)) forwards;
}
/* Kinetic Input: Chat Armed State */
#chat-input:not(:placeholder-shown)~button {
filter: saturate(1.3) brightness(1.1);
}
#chat-input:not(:placeholder-shown) {
border-color: var(--color-accent);
}
/* Evidence Frame for images (Tufte treatment) */
.nb-evidence-frame {
@apply border-2 border-neutral m-2 bg-base-200;
}
.nb-evidence-frame img {
display: block;
width: 100%;
height: auto;
}
.nb-evidence-frame figcaption {
@apply text-xs px-2 py-1 border-t-2 border-neutral;
font-family: 'JetBrains Mono', ui-monospace, monospace;
}
}
/* Theme-aware placeholder contrast tweaks */
@@ -821,31 +691,6 @@
color: rgba(255, 255, 255, 0.78) !important;
opacity: 0.85;
}
/* === DESIGN POLISHING: Receding Reality === */
/* Modal opens → background scales and blurs */
body:has(dialog[open]) #main-content-wrapper,
body.modal-open #main-content-wrapper {
transform: scale(0.98);
filter: blur(2px);
transition: transform 250ms var(--ease-mechanical, cubic-bezier(0.25, 1, 0.5, 1)),
filter 250ms var(--ease-mechanical, cubic-bezier(0.25, 1, 0.5, 1));
}
#main-content-wrapper {
transform: scale(1);
filter: blur(0);
transition: transform 250ms var(--ease-mechanical, cubic-bezier(0.25, 1, 0.5, 1)),
filter 250ms var(--ease-mechanical, cubic-bezier(0.25, 1, 0.5, 1));
}
/* === DESIGN POLISHING: Scroll-Linked Navbar Shadow === */
nav {
--scroll-depth: 0;
box-shadow: 4px calc(4px + var(--scroll-depth) * 4px) 0 0 #000;
transition: box-shadow 150ms ease;
}
}
/* satoshi.css */
@@ -869,15 +714,6 @@
font-display: swap;
}
@font-face {
font-family: 'JetBrains Mono';
src: url('fonts/JetBrainsMono-Regular.woff2') format('woff2'),
url('fonts/JetBrainsMono-Variable.ttf') format('truetype');
font-weight: 400 700;
font-style: normal;
font-display: swap;
}
/* Minimal override: prevent DaisyUI .menu hover bg on our nb buttons */
@layer utilities {
@@ -900,4 +736,4 @@
.toast-alert-title {
@apply text-lg font-bold;
}
}
}

View File

@@ -1,199 +0,0 @@
/**
* Design Polishing Pass - Interactive Effects
*
* Includes:
* - Scroll-Linked Navbar Shadow
* - HTMX Swap Animation
* - Typewriter AI Response
* - Rubberbanding Scroll
*/
(function() {
'use strict';
// === SCROLL-LINKED NAVBAR SHADOW ===
function initScrollShadow() {
const mainContent = document.querySelector('main');
const navbar = document.querySelector('nav');
if (!mainContent || !navbar) return;
mainContent.addEventListener('scroll', () => {
const scrollTop = mainContent.scrollTop;
const scrollHeight = mainContent.scrollHeight - mainContent.clientHeight;
const scrollDepth = scrollHeight > 0 ? Math.min(scrollTop / 200, 1) : 0;
navbar.style.setProperty('--scroll-depth', scrollDepth.toFixed(2));
}, { passive: true });
}
// === HTMX SWAP ANIMATION ===
function initHtmxSwapAnimation() {
document.body.addEventListener('htmx:afterSwap', (event) => {
let target = event.detail.target;
if (!target) return;
// If full body swap (hx-boost), animate only the main content
if (target.tagName === 'BODY') {
const main = document.querySelector('main');
if (main) target = main;
}
// Only animate if target is valid and inside/is main content or a card/panel
// Avoid animating sidebar or navbar updates
if (target && (target.tagName === 'MAIN' || target.closest('main'))) {
if (!target.classList.contains('animate-fade-up')) {
target.classList.add('animate-fade-up');
// Remove class after animation completes to allow re-animation
setTimeout(() => {
target.classList.remove('animate-fade-up');
}, 250);
}
}
});
}
// === TYPEWRITER AI RESPONSE ===
// Works with SSE streaming - buffers text and reveals character by character
window.initTypewriter = function(element, options = {}) {
const {
minDelay = 5,
maxDelay = 15,
showCursor = true
} = options;
let buffer = '';
let isTyping = false;
let cursorElement = null;
if (showCursor) {
cursorElement = document.createElement('span');
cursorElement.className = 'typewriter-cursor';
cursorElement.textContent = '▌';
cursorElement.style.animation = 'blink 1s step-end infinite';
element.appendChild(cursorElement);
}
function typeNextChar() {
if (buffer.length === 0) {
isTyping = false;
return;
}
isTyping = true;
const char = buffer.charAt(0);
buffer = buffer.slice(1);
// Insert before cursor
if (cursorElement && cursorElement.parentNode) {
const textNode = document.createTextNode(char);
element.insertBefore(textNode, cursorElement);
} else {
element.textContent += char;
}
const delay = minDelay + Math.random() * (maxDelay - minDelay);
setTimeout(typeNextChar, delay);
}
return {
append: function(text) {
buffer += text;
if (!isTyping) {
typeNextChar();
}
},
complete: function() {
// Flush remaining buffer immediately
if (cursorElement && cursorElement.parentNode) {
const textNode = document.createTextNode(buffer);
element.insertBefore(textNode, cursorElement);
cursorElement.remove();
} else {
element.textContent += buffer;
}
buffer = '';
isTyping = false;
}
};
};
// === RUBBERBANDING SCROLL ===
function initRubberbanding() {
const containers = document.querySelectorAll('#chat-scroll-container, .content-scroll-container');
containers.forEach(container => {
let startY = 0;
let pulling = false;
let pullDistance = 0;
const maxPull = 60;
const resistance = 0.4;
container.addEventListener('touchstart', (e) => {
startY = e.touches[0].clientY;
}, { passive: true });
container.addEventListener('touchmove', (e) => {
const currentY = e.touches[0].clientY;
const diff = currentY - startY;
// At top boundary, pulling down
if (container.scrollTop <= 0 && diff > 0) {
pulling = true;
pullDistance = Math.min(diff * resistance, maxPull);
container.style.transform = `translateY(${pullDistance}px)`;
}
// At bottom boundary, pulling up
else if (container.scrollTop + container.clientHeight >= container.scrollHeight && diff < 0) {
pulling = true;
pullDistance = Math.max(diff * resistance, -maxPull);
container.style.transform = `translateY(${pullDistance}px)`;
}
}, { passive: true });
container.addEventListener('touchend', () => {
if (pulling) {
container.style.transition = 'transform 300ms cubic-bezier(0.25, 1, 0.5, 1)';
container.style.transform = 'translateY(0)';
setTimeout(() => {
container.style.transition = '';
}, 300);
pulling = false;
pullDistance = 0;
}
}, { passive: true });
});
}
// === INITIALIZATION ===
function init() {
initScrollShadow();
initHtmxSwapAnimation();
initRubberbanding();
}
// Run on DOMContentLoaded
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', init);
} else {
init();
}
// Re-init rubberbanding after HTMX navigations
document.body.addEventListener('htmx:afterSettle', () => {
initRubberbanding();
});
// Add typewriter cursor blink animation
const style = document.createElement('style');
style.textContent = `
@keyframes blink {
0%, 100% { opacity: 1; }
50% { opacity: 0; }
}
.typewriter-cursor {
color: var(--color-accent);
font-weight: bold;
}
`;
document.head.appendChild(style);
})();

View File

@@ -0,0 +1,144 @@
//==========================================================
// head-support.js
//
// An extension to add head tag merging.
//==========================================================
(function(){
var api = null;
function log() {
//console.log(arguments);
}
function mergeHead(newContent, defaultMergeStrategy) {
if (newContent && newContent.indexOf('<head') > -1) {
const htmlDoc = document.createElement("html");
// remove svgs to avoid conflicts
var contentWithSvgsRemoved = newContent.replace(/<svg(\s[^>]*>|>)([\s\S]*?)<\/svg>/gim, '');
// extract head tag
var headTag = contentWithSvgsRemoved.match(/(<head(\s[^>]*>|>)([\s\S]*?)<\/head>)/im);
// if the head tag exists...
if (headTag) {
var added = []
var removed = []
var preserved = []
var nodesToAppend = []
htmlDoc.innerHTML = headTag;
var newHeadTag = htmlDoc.querySelector("head");
var currentHead = document.head;
if (newHeadTag == null) {
return;
} else {
// put all new head elements into a Map, by their outerHTML
var srcToNewHeadNodes = new Map();
for (const newHeadChild of newHeadTag.children) {
srcToNewHeadNodes.set(newHeadChild.outerHTML, newHeadChild);
}
}
// determine merge strategy
var mergeStrategy = api.getAttributeValue(newHeadTag, "hx-head") || defaultMergeStrategy;
// get the current head
for (const currentHeadElt of currentHead.children) {
// If the current head element is in the map
var inNewContent = srcToNewHeadNodes.has(currentHeadElt.outerHTML);
var isReAppended = currentHeadElt.getAttribute("hx-head") === "re-eval";
var isPreserved = api.getAttributeValue(currentHeadElt, "hx-preserve") === "true";
if (inNewContent || isPreserved) {
if (isReAppended) {
// remove the current version and let the new version replace it and re-execute
removed.push(currentHeadElt);
} else {
// this element already exists and should not be re-appended, so remove it from
// the new content map, preserving it in the DOM
srcToNewHeadNodes.delete(currentHeadElt.outerHTML);
preserved.push(currentHeadElt);
}
} else {
if (mergeStrategy === "append") {
// we are appending and this existing element is not new content
// so if and only if it is marked for re-append do we do anything
if (isReAppended) {
removed.push(currentHeadElt);
nodesToAppend.push(currentHeadElt);
}
} else {
// if this is a merge, we remove this content since it is not in the new head
if (api.triggerEvent(document.body, "htmx:removingHeadElement", {headElement: currentHeadElt}) !== false) {
removed.push(currentHeadElt);
}
}
}
}
// Push the tremaining new head elements in the Map into the
// nodes to append to the head tag
nodesToAppend.push(...srcToNewHeadNodes.values());
log("to append: ", nodesToAppend);
for (const newNode of nodesToAppend) {
log("adding: ", newNode);
var newElt = document.createRange().createContextualFragment(newNode.outerHTML);
log(newElt);
if (api.triggerEvent(document.body, "htmx:addingHeadElement", {headElement: newElt}) !== false) {
currentHead.appendChild(newElt);
added.push(newElt);
}
}
// remove all removed elements, after we have appended the new elements to avoid
// additional network requests for things like style sheets
for (const removedElement of removed) {
if (api.triggerEvent(document.body, "htmx:removingHeadElement", {headElement: removedElement}) !== false) {
currentHead.removeChild(removedElement);
}
}
api.triggerEvent(document.body, "htmx:afterHeadMerge", {added: added, kept: preserved, removed: removed});
}
}
}
htmx.defineExtension("head-support", {
init: function(apiRef) {
// store a reference to the internal API.
api = apiRef;
htmx.on('htmx:afterSwap', function(evt){
let xhr = evt.detail.xhr;
if (xhr) {
var serverResponse = xhr.response;
if (api.triggerEvent(document.body, "htmx:beforeHeadMerge", evt.detail)) {
mergeHead(serverResponse, evt.detail.boosted ? "merge" : "append");
}
}
})
htmx.on('htmx:historyRestore', function(evt){
if (api.triggerEvent(document.body, "htmx:beforeHeadMerge", evt.detail)) {
if (evt.detail.cacheMiss) {
mergeHead(evt.detail.serverResponse, "merge");
} else {
mergeHead(evt.detail.item.head, "merge");
}
}
})
htmx.on('htmx:historyItemCreated', function(evt){
var historyItem = evt.detail.item;
historyItem.head = document.head.outerHTML;
})
}
});
})()

File diff suppressed because one or more lines are too long

View File

@@ -41,7 +41,6 @@ pub struct ScratchpadDetailData {
user: User,
scratchpad: ScratchpadDetail,
conversation_archive: Vec<Conversation>,
is_editing_title: bool,
}
#[derive(Serialize)]
@@ -181,8 +180,8 @@ pub async fn show_scratchpad_modal(
let scratchpad_detail = ScratchpadDetail::from(&scratchpad);
// Handle edit_title query parameter
let is_editing_title = query.edit_title.unwrap_or(false);
// Handle edit_title query parameter if needed in future
let _ = query.edit_title.unwrap_or(false);
Ok(TemplateResponse::new_template(
"scratchpad/editor_modal.html",
@@ -190,7 +189,6 @@ pub async fn show_scratchpad_modal(
user,
scratchpad: scratchpad_detail,
conversation_archive,
is_editing_title,
},
))
}
@@ -265,7 +263,6 @@ pub async fn update_scratchpad_title(
user,
scratchpad: ScratchpadDetail::from(&scratchpad),
conversation_archive,
is_editing_title: false,
},
))
}

View File

@@ -1,7 +1,6 @@
use std::{
collections::{HashMap, HashSet},
fmt,
str::FromStr,
fmt, str::FromStr,
};
use axum::{
@@ -199,60 +198,59 @@ pub async fn search_result_handler(
}
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
let (search_results_for_template, final_query_param_for_template) = if let Some(actual_query) =
params.query
{
let trimmed_query = actual_query.trim();
if trimmed_query.is_empty() {
(Vec::<SearchResultForTemplate>::new(), String::new())
} else {
// Use retrieval pipeline Search strategy
let config = RetrievalConfig::for_search(SearchTarget::Both);
// Checkout a reranker lease if pool is available
let reranker_lease = match &state.reranker_pool {
Some(pool) => Some(pool.checkout().await),
None => None,
};
let result = retrieval_pipeline::pipeline::run_pipeline(
&state.db,
&state.openai_client,
Some(&state.embedding_provider),
trimmed_query,
&user.id,
config,
reranker_lease,
)
.await?;
let search_result = match result {
StrategyOutput::Search(sr) => sr,
_ => SearchResult::new(vec![], vec![]),
};
let mut source_ids = HashSet::new();
for chunk_result in &search_result.chunks {
source_ids.insert(chunk_result.chunk.source_id.clone());
}
for entity_result in &search_result.entities {
source_ids.insert(entity_result.entity.source_id.clone());
}
let source_label_map = if source_ids.is_empty() {
HashMap::new()
let (search_results_for_template, final_query_param_for_template) =
if let Some(actual_query) = params.query {
let trimmed_query = actual_query.trim();
if trimmed_query.is_empty() {
(Vec::<SearchResultForTemplate>::new(), String::new())
} else {
let record_ids: Vec<RecordId> = source_ids
.iter()
.filter_map(|id| {
if id.contains(':') {
RecordId::from_str(id).ok()
} else {
Some(RecordId::from_table_key(TextContent::table_name(), id))
}
})
.collect();
let mut response = state
// Use retrieval pipeline Search strategy
let config = RetrievalConfig::for_search(SearchTarget::Both);
// Checkout a reranker lease if pool is available
let reranker_lease = match &state.reranker_pool {
Some(pool) => Some(pool.checkout().await),
None => None,
};
let result = retrieval_pipeline::pipeline::run_pipeline(
&state.db,
&state.openai_client,
Some(&state.embedding_provider),
trimmed_query,
&user.id,
config,
reranker_lease,
)
.await?;
let search_result = match result {
StrategyOutput::Search(sr) => sr,
_ => SearchResult::new(vec![], vec![]),
};
let mut source_ids = HashSet::new();
for chunk_result in &search_result.chunks {
source_ids.insert(chunk_result.chunk.source_id.clone());
}
for entity_result in &search_result.entities {
source_ids.insert(entity_result.entity.source_id.clone());
}
let source_label_map = if source_ids.is_empty() {
HashMap::new()
} else {
let record_ids: Vec<RecordId> = source_ids
.iter()
.filter_map(|id| {
if id.contains(':') {
RecordId::from_str(id).ok()
} else {
Some(RecordId::from_table_key(TextContent::table_name(), id))
}
})
.collect();
let mut response = state
.db
.client
.query(
@@ -262,84 +260,84 @@ pub async fn search_result_handler(
.bind(("user_id", user.id.clone()))
.bind(("record_ids", record_ids))
.await?;
let contents: Vec<SourceLabelRow> = response.take(0)?;
let contents: Vec<SourceLabelRow> = response.take(0)?;
tracing::debug!(
source_id_count = source_ids.len(),
label_row_count = contents.len(),
"Resolved search source labels"
);
let mut labels = HashMap::new();
for content in contents {
let label = build_source_label(&content);
labels.insert(content.id.clone(), label.clone());
labels.insert(
format!("{}:{}", TextContent::table_name(), content.id),
label,
tracing::debug!(
source_id_count = source_ids.len(),
label_row_count = contents.len(),
"Resolved search source labels"
);
let mut labels = HashMap::new();
for content in contents {
let label = build_source_label(&content);
labels.insert(content.id.clone(), label.clone());
labels.insert(
format!("{}:{}", TextContent::table_name(), content.id),
label,
);
}
labels
};
let mut combined_results: Vec<SearchResultForTemplate> =
Vec::with_capacity(search_result.chunks.len() + search_result.entities.len());
// Add chunk results
for chunk_result in search_result.chunks {
let source_label = source_label_map
.get(&chunk_result.chunk.source_id)
.cloned()
.unwrap_or_else(|| fallback_source_label(&chunk_result.chunk.source_id));
combined_results.push(SearchResultForTemplate {
result_type: "text_chunk".to_string(),
score: chunk_result.score,
text_chunk: Some(TextChunkForTemplate {
id: chunk_result.chunk.id,
source_id: chunk_result.chunk.source_id,
source_label,
chunk: chunk_result.chunk.chunk,
score: chunk_result.score,
}),
knowledge_entity: None,
});
}
labels
};
let mut combined_results: Vec<SearchResultForTemplate> =
Vec::with_capacity(search_result.chunks.len() + search_result.entities.len());
// Add chunk results
for chunk_result in search_result.chunks {
let source_label = source_label_map
.get(&chunk_result.chunk.source_id)
.cloned()
.unwrap_or_else(|| fallback_source_label(&chunk_result.chunk.source_id));
combined_results.push(SearchResultForTemplate {
result_type: "text_chunk".to_string(),
score: chunk_result.score,
text_chunk: Some(TextChunkForTemplate {
id: chunk_result.chunk.id,
source_id: chunk_result.chunk.source_id,
source_label,
chunk: chunk_result.chunk.chunk,
score: chunk_result.score,
}),
knowledge_entity: None,
});
}
// Add entity results
for entity_result in search_result.entities {
let source_label = source_label_map
.get(&entity_result.entity.source_id)
.cloned()
.unwrap_or_else(|| fallback_source_label(&entity_result.entity.source_id));
combined_results.push(SearchResultForTemplate {
result_type: "knowledge_entity".to_string(),
score: entity_result.score,
text_chunk: None,
knowledge_entity: Some(KnowledgeEntityForTemplate {
id: entity_result.entity.id,
name: entity_result.entity.name,
description: entity_result.entity.description,
entity_type: format!("{:?}", entity_result.entity.entity_type),
source_id: entity_result.entity.source_id,
source_label,
// Add entity results
for entity_result in search_result.entities {
let source_label = source_label_map
.get(&entity_result.entity.source_id)
.cloned()
.unwrap_or_else(|| fallback_source_label(&entity_result.entity.source_id));
combined_results.push(SearchResultForTemplate {
result_type: "knowledge_entity".to_string(),
score: entity_result.score,
}),
});
text_chunk: None,
knowledge_entity: Some(KnowledgeEntityForTemplate {
id: entity_result.entity.id,
name: entity_result.entity.name,
description: entity_result.entity.description,
entity_type: format!("{:?}", entity_result.entity.entity_type),
source_id: entity_result.entity.source_id,
source_label,
score: entity_result.score,
}),
});
}
// Sort by score descending
combined_results.sort_by(|a, b| b.score.total_cmp(&a.score));
// Limit results
const TOTAL_LIMIT: usize = 10;
combined_results.truncate(TOTAL_LIMIT);
(combined_results, trimmed_query.to_string())
}
// Sort by score descending
combined_results.sort_by(|a, b| b.score.total_cmp(&a.score));
// Limit results
const TOTAL_LIMIT: usize = 10;
combined_results.truncate(TOTAL_LIMIT);
(combined_results, trimmed_query.to_string())
}
} else {
(Vec::<SearchResultForTemplate>::new(), String::new())
};
} else {
(Vec::<SearchResultForTemplate>::new(), String::new())
};
Ok(TemplateResponse::new_template(
"search/base.html",

View File

@@ -8,31 +8,44 @@
<section class="nb-panel p-4 sm:p-5 flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
<div>
<h1 class="text-xl font-extrabold tracking-tight">Admin Controls</h1>
<p class="text-sm opacity-70 max-w-2xl">
Stay on top of analytics and manage AI integrations without waiting on long-running model calls.
</p>
</div>
<div class="text-xs opacity-60 sm:text-right">
Signed in as <span class="font-medium">{{ user.email }}</span>
</div>
</section>
<nav class="nb-panel p-2 flex flex-wrap gap-2 text-sm" hx-boost="true" hx-target="#admin-shell"
hx-select="#admin-shell" hx-swap="outerHTML" hx-push-url="true">
<a href="/admin?section=overview"
class="nb-btn btn-sm px-4 {% if current_section == 'overview' %}nb-cta{% else %}btn-ghost{% endif %}">
<nav
class="nb-panel p-2 flex flex-wrap gap-2 text-sm"
hx-boost="true"
hx-target="#admin-shell"
hx-select="#admin-shell"
hx-swap="outerHTML"
hx-push-url="true"
>
<a
href="/admin?section=overview"
class="nb-btn btn-sm px-4 {% if current_section == 'overview' %}nb-cta{% else %}btn-ghost{% endif %}"
>
Overview
</a>
<a href="/admin?section=models"
class="nb-btn btn-sm px-4 {% if current_section == 'models' %}nb-cta{% else %}btn-ghost{% endif %}">
<a
href="/admin?section=models"
class="nb-btn btn-sm px-4 {% if current_section == 'models' %}nb-cta{% else %}btn-ghost{% endif %}"
>
Models
</a>
</nav>
<div id="admin-content" class="flex flex-col gap-4">
{% if current_section == 'models' %}
{% include 'admin/sections/models.html' %}
{% include 'admin/sections/models.html' %}
{% else %}
{% include 'admin/sections/overview.html' %}
{% include 'admin/sections/overview.html' %}
{% endif %}
</div>
</div>
</div>
{% endblock %}
{% endblock %}

View File

@@ -1,7 +1,5 @@
{% extends "modal_base.html" %}
{% block modal_class %}max-w-3xl{% endblock %}
{% block form_attributes %}
hx-patch="/update-image-prompt"
hx-target="#system_prompt_section"

View File

@@ -1,7 +1,5 @@
{% extends "modal_base.html" %}
{% block modal_class %}max-w-3xl{% endblock %}
{% block form_attributes %}
hx-patch="/update-ingestion-prompt"
hx-target="#system_prompt_section"

View File

@@ -1,7 +1,5 @@
{% extends "modal_base.html" %}
{% block modal_class %}max-w-3xl{% endblock %}
{% block form_attributes %}
hx-patch="/update-query-prompt"
hx-target="#system_prompt_section"

View File

@@ -2,8 +2,8 @@
{% block body %}
<body class="relative">
<div id="main-content-wrapper" class="drawer lg:drawer-open">
<body class="relative" hx-ext="head-support">
<div class="drawer lg:drawer-open">
<input id="my-drawer" type="checkbox" class="drawer-toggle" />
<!-- Page Content -->
<div class="drawer-content flex flex-col h-screen">
@@ -14,7 +14,6 @@
{% block main %}{% endblock %}
<div class="p32 min-h-[10px]"></div>
</main>
{% block overlay %}{% endblock %}
</div>
<!-- Sidebar -->
{% if user %}

View File

@@ -2,6 +2,10 @@
{% block title %}Minne - Chat{% endblock %}
{% block head %}
<script src="/assets/htmx-ext-sse.js" defer></script>
{% endblock %}
{% block main %}
<div class="flex grow relative justify-center mt-2 sm:mt-4">
<div class="container">
@@ -13,69 +17,32 @@
</section>
<div id="chat-scroll-container" class="overflow-auto hide-scrollbar">
{% include "chat/history.html" %}
{% include "chat/new_message_form.html" %}
</div>
</div>
</div>
<script>
function doScrollChatToBottom() {
const mainScroll = document.querySelector('main');
if (mainScroll) mainScroll.scrollTop = mainScroll.scrollHeight;
const chatScroll = document.getElementById('chat-scroll-container');
if (chatScroll) chatScroll.scrollTop = chatScroll.scrollHeight;
const chatContainer = document.getElementById('chat_container');
if (chatContainer) chatContainer.scrollTop = chatContainer.scrollHeight;
window.scrollTo(0, document.body.scrollHeight);
}
function scrollChatToBottom() {
if (!window.location.pathname.startsWith('/chat')) return;
requestAnimationFrame(doScrollChatToBottom);
requestAnimationFrame(() => {
const mainScroll = document.querySelector('main');
if (mainScroll) mainScroll.scrollTop = mainScroll.scrollHeight;
const chatScroll = document.getElementById('chat-scroll-container');
if (chatScroll) chatScroll.scrollTop = chatScroll.scrollHeight;
const chatContainer = document.getElementById('chat_container');
if (chatContainer) chatContainer.scrollTop = chatContainer.scrollHeight;
window.scrollTo(0, document.body.scrollHeight);
});
}
window.scrollChatToBottom = scrollChatToBottom;
// Delay initial scroll to avoid interfering with view transition
document.addEventListener('DOMContentLoaded', () => setTimeout(scrollChatToBottom, 350));
document.addEventListener('DOMContentLoaded', scrollChatToBottom);
function handleChatSwap(e) {
if (!window.location.pathname.startsWith('/chat')) return;
// Full page swap: delay for view transition; partial swap: immediate
if (e.detail && e.detail.target && e.detail.target.tagName === 'BODY') {
setTimeout(scrollChatToBottom, 350);
} else {
scrollChatToBottom();
}
}
function cleanupChatListeners(e) {
if (e.detail && e.detail.target && e.detail.target.tagName === 'BODY') {
document.body.removeEventListener('htmx:afterSwap', window._chatEventHandlers.afterSwap);
document.body.removeEventListener('htmx:afterSettle', window._chatEventHandlers.afterSettle);
document.body.removeEventListener('htmx:beforeSwap', window._chatEventHandlers.beforeSwap);
delete window._chatEventHandlers;
window._chatListenersAttached = false;
}
}
window._chatEventHandlers = {
afterSwap: handleChatSwap,
afterSettle: handleChatSwap,
beforeSwap: cleanupChatListeners
};
if (!window._chatListenersAttached) {
document.body.addEventListener('htmx:afterSwap', window._chatEventHandlers.afterSwap);
document.body.addEventListener('htmx:afterSettle', window._chatEventHandlers.afterSettle);
document.body.addEventListener('htmx:beforeSwap', window._chatEventHandlers.beforeSwap);
window._chatListenersAttached = true;
}
document.body.addEventListener('htmx:afterSwap', scrollChatToBottom);
document.body.addEventListener('htmx:afterSettle', scrollChatToBottom);
</script>
{% endblock %}
{% block overlay %}
{% include "chat/new_message_form.html" %}
{% endblock %}

View File

@@ -1,6 +1,6 @@
<div class="fixed bottom-0 left-0 right-0 lg:left-72 z-20">
<div class="mx-auto max-w-3xl px-4 pb-3">
<div class="nb-panel p-2 no-animation">
<div class="nb-panel p-2">
<form hx-post="{% if conversation %} /chat/{{conversation.id}} {% else %} /chat {% endif %}"
hx-target="#chat_container" hx-swap="beforeend" class="relative flex gap-2 items-end" id="chat-form">
<textarea autofocus required name="content" placeholder="Type your message…" rows="3"

View File

@@ -8,15 +8,15 @@
{% if text_contents|length > 0 %}
<div class="nb-masonry w-full">
{% for text_content in text_contents %}
<article class="nb-card cursor-pointer mx-auto mb-4 w-full space-y-3"
<article class="nb-card cursor-pointer mx-auto mb-4 w-full max-w-[92vw] space-y-3 sm:max-w-none"
hx-get="/content/{{ text_content.id }}/read" hx-target="#modal" hx-swap="innerHTML">
{% if text_content.url_info %}
<figure class="nb-evidence-frame -mx-4 -mt-4 mb-3">
<figure class="-mx-4 -mt-4 border-b-2 border-neutral bg-base-200">
<img class="w-full h-auto" src="/file/{{text_content.url_info.image_id}}" alt="website screenshot" />
</figure>
{% endif %}
{% if text_content.file_info.mime_type == "image/png" or text_content.file_info.mime_type == "image/jpeg" %}
<figure class="nb-evidence-frame -mx-4 -mt-4 mb-3">
<figure class="-mx-4 -mt-4 border-b-2 border-neutral bg-base-200">
<img class="w-full h-auto" src="/file/{{text_content.file_info.id}}" alt="{{text_content.file_info.file_name}}" />
</figure>
{% endif %}
@@ -31,10 +31,10 @@
{% endif %}
</h2>
<div class="flex flex-wrap items-center justify-between gap-3">
<p class="nb-data text-xs opacity-60 shrink-0">
<p class="text-xs opacity-60 shrink-0">
{{ text_content.created_at | datetimeformat(format="short", tz=user.timezone) }}
</p>
<span class="nb-badge nb-data">{{ text_content.category }}</span>
<span class="nb-badge">{{ text_content.category }}</span>
<div class="flex gap-2" hx-on:click="event.stopPropagation()">
{% if text_content.url_info %}
<a href="{{text_content.url_info.url}}" target="_blank" rel="noopener noreferrer"

View File

@@ -2,6 +2,10 @@
{% block title %}Minne - Dashboard{% endblock %}
{% block head %}
<script src="/assets/htmx-ext-sse.js" defer></script>
{% endblock %}
{% block main %}
<div class="flex justify-center grow mt-2 sm:mt-4 pb-4 w-full">
<div class="container">

View File

@@ -16,15 +16,11 @@
<!-- Scripts -->
<script src="/assets/htmx.min.js" defer></script>
<script src="/assets/htmx-ext-sse.js" defer></script>
<script src="/assets/theme-toggle.js" defer></script>
<script src="/assets/toast.js" defer></script>
<script src="/assets/htmx-head-ext.js" defer></script>
<script src="/assets/marked.min.js" defer></script>
<script src="/assets/knowledge-graph.js" defer></script>
<script src="/assets/design-polish.js" defer></script>
<!-- Global View Transition -->
<meta name="view-transition" content="same-origin" />
<!-- Icons -->
<link rel="icon" href="/assets/icon/favicon.ico">
@@ -42,7 +38,6 @@
(function wait_for_htmx() {
if (window.htmx) {
htmx.config.globalViewTransitions = true;
htmx.config.selfRequestsOnly = false;
} else {
setTimeout(wait_for_htmx, 50);
}

View File

@@ -1,78 +1,41 @@
{% extends "modal_base.html" %}
{% block modal_class %}max-w-3xl{% endblock %}
{% block form_attributes %}
hx-post="/ingress-form"
enctype="multipart/form-data"
{% endblock %}
{% block modal_content %}
<h3 class="text-xl font-extrabold tracking-tight pr-8">Add New Content</h3>
<div class="flex flex-col">
<!-- Content Source -->
<h3 class="text-xl font-extrabold tracking-tight">Add New Content</h3>
<div class="flex flex-col gap-3">
<label class="w-full">
<div class="nb-label mb-1">Content</div>
<textarea name="content" class="nb-input w-full" rows="4" autofocus
<div class="text-xs uppercase tracking-wide opacity-70 mb-1">Content</div>
<textarea name="content" class="nb-input w-full min-h-28"
placeholder="Paste a URL or type/paste text to ingest…">{{ content }}</textarea>
</label>
<!-- Context (Optional) -->
<label class="w-full mt-6">
<div class="nb-label mb-1 flex justify-between items-center">
<span>Context</span>
<!-- Tufte-style annotation: clean, small caps, structural -->
<span class="text-[10px] tracking-widest uppercase border border-neutral px-1.5 py-px bg-transparent opacity-60">Optional</span>
</div>
<textarea name="context" class="nb-input w-full" rows="2"
placeholder="Guide how this content should be interpreted…">{{ context }}</textarea>
<label class="w-full">
<div class="text-xs uppercase tracking-wide opacity-70 mb-1">Context</div>
<textarea name="context" class="nb-input w-full min-h-24"
placeholder="Optional: add context to guide how the content should be interpreted…">{{ context }}</textarea>
</label>
<!-- Metadata Grid -->
<div class="grid grid-cols-1 lg:grid-cols-2 gap-x-12 gap-y-8 items-start mt-6">
<!-- Category -->
<label class="w-full">
<div class="nb-label mb-1">Category <span class="text-error font-bold" title="Required">*</span></div>
<div class="relative">
<input type="text" name="category" class="nb-input validator w-full pr-8" value="{{ category }}" list="category-list" required placeholder="Select or type..." />
<div class="absolute right-2 top-1/2 -translate-y-1/2 pointer-events-none opacity-50">
{% include "icons/chevron_icon.html" %}
</div>
</div>
<datalist id="category-list">
{% for category in user_categories %}
<option value="{{ category }}" />
{% endfor %}
</datalist>
<div class="validator-hint hidden text-xs opacity-70 mt-1 text-error">Category is required</div>
</label>
<label class="w-full">
<div class="text-xs uppercase tracking-wide opacity-70 mb-1">Category</div>
<input type="text" name="category" class="nb-input validator w-full" value="{{ category }}" list="category-list" required />
<datalist id="category-list">
{% for category in user_categories %}
<option value="{{ category }}" />
{% endfor %}
</datalist>
<div class="validator-hint hidden text-xs opacity-70 mt-1">Category is required</div>
</label>
<!-- Dimensional File Drop Zone -->
<div class="w-full">
<div class="nb-label mb-1">Files</div>
<!-- "Card" style dropzone: solid border, hard shadow, lift on hover -->
<div class="relative w-full h-32 group bg-base-100 border-2 border-neutral shadow-[4px_4px_0_0_#000] hover:translate-x-[-1px] hover:translate-y-[-1px] hover:shadow-[6px_6px_0_0_#000] transition-all duration-150">
<!-- Visual Facade -->
<div class="absolute inset-0 flex flex-col items-center justify-center gap-3 text-sm font-medium text-neutral pointer-events-none">
<div class="p-2 border-2 border-neutral rounded-none bg-base-200 group-hover:bg-base-100 transition-colors">
<span class="w-6 h-6 block">{% include "icons/document_icon.html" %}</span>
</div>
<span id="file-label-text" class="text-center px-4 text-xs uppercase tracking-wide">Drop files or click</span>
</div>
<!-- Actual Input -->
<input type="file" name="files" multiple
class="absolute inset-0 w-full h-full opacity-0 cursor-pointer"
onchange="const count = this.files.length; document.getElementById('file-label-text').innerText = count > 0 ? count + ' FILE' + (count !== 1 ? 'S' : '') + ' SELECTED' : 'DROP FILES OR CLICK';" />
</div>
</div>
</div>
<label class="w-full">
<div class="text-xs uppercase tracking-wide opacity-70 mb-1">Files</div>
<input type="file" name="files" multiple class="file-input w-full rounded-none border-2 border-neutral" />
</label>
</div>
<div id="error-message" class="text-error text-center {% if not error %}hidden{% endif %}">{{ error }}</div>
<script>
(function () {
const form = document.getElementById('modal_form');
@@ -88,9 +51,8 @@ enctype="multipart/form-data"
})();
</script>
{% endblock %}
{% block primary_actions %}
<button type="submit" class="nb-btn nb-cta w-full sm:w-auto">
<button type="submit" class="nb-btn nb-cta">
Add Content
</button>
{% endblock %}

View File

@@ -1,24 +1,15 @@
<dialog id="body_modal" class="modal">
<div
class="modal-box relative rounded-none border-2 border-neutral bg-base-100 shadow-[8px_8px_0_0_#000] p-6 {% block modal_class %}max-w-lg{% endblock %}">
<!-- God Level UX: Explicit Escape Hatch -->
<button type="button"
class="btn btn-sm btn-square btn-ghost absolute right-2 top-2 z-10"
onclick="document.getElementById('body_modal').close()"
aria-label="Close modal">
{% include "icons/x_icon.html" %}
</button>
class="modal-box rounded-none border-2 border-neutral bg-base-100 shadow-[8px_8px_0_0_#000] {% block modal_class %}{% endblock %}">
<form id="modal_form" {% block form_attributes %}{% endblock %}>
<div class="flex flex-col flex-1 gap-5">
<div class="flex flex-col flex-1 gap-4">
{% block modal_content %}{% endblock %}
</div>
<div class="mt-8 pt-2 flex flex-col gap-2 sm:flex-row sm:justify-end sm:items-center">
<!-- Secondary Action: Ghost style to reduce noise -->
<button type="button" class="btn btn-ghost rounded-none w-full sm:w-auto hover:bg-neutral/10" onclick="document.getElementById('body_modal').close()">
Cancel
<div class="u-hairline mt-4 pt-3 flex flex-col gap-2 sm:flex-row sm:justify-end sm:items-center">
<!-- Close button (always visible) -->
<button type="button" class="nb-btn w-full sm:w-auto" onclick="document.getElementById('body_modal').close()">
Close
</button>
<!-- Primary actions block -->

View File

@@ -1,4 +1,4 @@
<nav class="sticky top-0 z-10 nb-panel nb-panel-canvas border-t-0" style="view-transition-name: navbar; contain: layout;">
<nav class="sticky top-0 z-10 nb-panel nb-panel-canvas border-t-0">
<div class="container mx-auto navbar">
<div class="mr-2 flex-1">
{% include "searchbar.html" %}

View File

@@ -6,7 +6,7 @@
{% block modal_content %}
<h3 class="text-xl font-extrabold tracking-tight">
<div class="flex items-center gap-2 {% if is_editing_title %}hidden{% endif %}" id="title-container">
<div class="flex items-center gap-2" id="title-container">
<span class="font-semibold text-lg flex-1 truncate" id="title-display">{{ scratchpad.title }}</span>
<button type="button" onclick="editTitle()" class="nb-btn nb-btn-sm btn-ghost">
{% include "icons/edit_icon.html" %} Edit title
@@ -15,9 +15,9 @@
<!-- Hidden title form -->
<form id="title-form" hx-patch="/scratchpad/{{ scratchpad.id }}/title" hx-target="#body_modal" hx-swap="outerHTML"
class="{% if not is_editing_title %}hidden{% endif %} flex items-center gap-2">
class="hidden flex items-center gap-2">
<input type="text" name="title" value="{{ scratchpad.title }}"
class="nb-input nb-input-sm font-semibold text-lg flex-1" id="title-input" {% if is_editing_title %}autofocus{% endif %}>
class="nb-input nb-input-sm font-semibold text-lg flex-1" id="title-input">
<button type="submit" class="nb-btn nb-btn-sm">{% include "icons/check_icon.html" %}</button>
<button type="button" onclick="cancelEditTitle()" class="nb-btn nb-btn-sm btn-ghost">{% include "icons/x_icon.html" %}</button>
</form>

View File

@@ -0,0 +1,41 @@
<!-- Theme switch script -->
<script>
const initializeTheme = () => {
console.log("Initializing theme toggle...");
const themeToggle = document.querySelector('.theme-controller');
if (!themeToggle) {
console.log("Theme toggle not found.");
return;
}
// Detect system preference
const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches;
// Initialize theme from local storage or system preference
const savedTheme = localStorage.getItem('theme');
const initialTheme = savedTheme ? savedTheme : (prefersDark ? 'dark' : 'light');
document.documentElement.setAttribute('data-theme', initialTheme);
themeToggle.checked = initialTheme === 'dark';
// Update theme and local storage on toggle
themeToggle.addEventListener('change', () => {
const theme = themeToggle.checked ? 'dark' : 'light';
console.log("Theme switched to:", theme);
document.documentElement.setAttribute('data-theme', theme);
localStorage.setItem('theme', theme);
});
console.log("Theme toggle initialized.");
};
// Run the initialization after the DOM is fully loaded
document.addEventListener('DOMContentLoaded', () => {
console.log("DOM fully loaded. Initializing theme toggle...");
initializeTheme();
});
// Reinitialize theme toggle after HTMX swaps
document.addEventListener('htmx:afterSwap', initializeTheme);
document.addEventListener('htmx:afterSettle', initializeTheme);
</script>

View File

@@ -14,7 +14,7 @@
{% endif %}
{% endmacro %}
<div class="drawer-side z-20" style="view-transition-name: sidebar; contain: layout;">
<div class="drawer-side z-20">
<label for="my-drawer" aria-label="close sidebar" class="drawer-overlay"></label>
<ul class="menu p-0 w-72 h-full nb-canvas text-base-content flex flex-col border-r-2 border-neutral">
@@ -47,7 +47,7 @@
</div>
<!-- === MIDDLE SCROLLABLE SECTION === -->
<span class="px-4 py-2 nb-label">Recent Chats</span>
<span class="px-4 py-2 font-semibold tracking-wide">Recent Chats</span>
<div class="flex-1 overflow-y-auto space-y-1 custom-scrollbar">
{% if conversation_archive is defined and conversation_archive %}
{% for conversation in conversation_archive %}

View File

@@ -1,4 +1,7 @@
#![allow(clippy::missing_docs_in_private_items, clippy::result_large_err)]
#![allow(
clippy::missing_docs_in_private_items,
clippy::result_large_err
)]
pub mod pipeline;
pub mod utils;

View File

@@ -182,7 +182,9 @@ impl IngestionPipeline {
.saturating_sub(1)
.min(tuning.retry_backoff_cap_exponent);
let multiplier = 2_u64.pow(capped_attempt);
let delay = tuning.retry_base_delay_secs.saturating_mul(multiplier);
let delay = tuning
.retry_base_delay_secs
.saturating_mul(multiplier);
Duration::from_secs(delay.min(tuning.retry_max_delay_secs))
}

View File

@@ -184,9 +184,8 @@ impl PipelineServices for DefaultPipelineServices {
None => None,
};
let config = retrieval_pipeline::RetrievalConfig::for_search(
retrieval_pipeline::SearchTarget::EntitiesOnly,
);
let config =
retrieval_pipeline::RetrievalConfig::for_search(retrieval_pipeline::SearchTarget::EntitiesOnly);
match retrieval_pipeline::retrieve_entities(
&self.db,
&self.openai_client,

View File

@@ -1,6 +1,6 @@
[package]
name = "main"
version = "1.0.0"
version = "0.2.7"
edition = "2021"
repository = "https://github.com/perstarkse/minne"
license = "AGPL-3.0-or-later"

View File

@@ -2,9 +2,7 @@ use api_router::{api_routes_v1, api_state::ApiState};
use axum::{extract::FromRef, Router};
use common::{
storage::{
db::SurrealDbClient,
indexes::ensure_runtime_indexes,
store::StorageManager,
db::SurrealDbClient, indexes::ensure_runtime_indexes, store::StorageManager,
types::{
knowledge_entity::KnowledgeEntity, system_settings::SystemSettings,
text_chunk::TextChunk,
@@ -68,6 +66,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let (settings, dimensions_changed) =
SystemSettings::sync_from_embedding_provider(&db, &embedding_provider).await?;
// Now ensure runtime indexes with the correct (synced) dimensions
ensure_runtime_indexes(&db, settings.embedding_dimensions as usize).await?;
// If dimensions changed, re-embed existing data to keep queries working.
if dimensions_changed {
warn!(
@@ -77,19 +78,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Re-embed text chunks
info!("Re-embedding TextChunks");
if let Err(e) =
TextChunk::update_all_embeddings_with_provider(&db, &embedding_provider).await
if let Err(e) = TextChunk::update_all_embeddings_with_provider(
&db,
&embedding_provider,
)
.await
{
error!(
"Failed to re-embed TextChunks: {}. Search results may be stale.",
e
);
error!("Failed to re-embed TextChunks: {}. Search results may be stale.", e);
}
// Re-embed knowledge entities
info!("Re-embedding KnowledgeEntities");
if let Err(e) =
KnowledgeEntity::update_all_embeddings_with_provider(&db, &embedding_provider).await
if let Err(e) = KnowledgeEntity::update_all_embeddings_with_provider(
&db,
&embedding_provider,
)
.await
{
error!(
"Failed to re-embed KnowledgeEntities: {}. Search results may be stale.",
@@ -100,9 +104,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
info!("Re-embedding complete.");
}
// Now ensure runtime indexes with the correct (synced) dimensions
ensure_runtime_indexes(&db, settings.embedding_dimensions as usize).await?;
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
// Create global storage manager

View File

@@ -51,8 +51,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let storage = StorageManager::new(&config).await?;
// Create embedding provider based on config
let embedding_provider =
Arc::new(EmbeddingProvider::from_config(&config, Some(openai_client.clone())).await?);
let embedding_provider = Arc::new(
EmbeddingProvider::from_config(&config, Some(openai_client.clone())).await?,
);
info!(
embedding_backend = ?config.embedding_backend,
embedding_dimension = embedding_provider.dimension(),

View File

@@ -1,8 +1,7 @@
use std::sync::Arc;
use common::{
storage::db::SurrealDbClient,
storage::store::StorageManager,
storage::db::SurrealDbClient, storage::store::StorageManager,
utils::{config::get_config, embedding::EmbeddingProvider},
};
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
@@ -41,8 +40,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
// Create embedding provider based on config
let embedding_provider =
Arc::new(EmbeddingProvider::from_config(&config, Some(openai_client.clone())).await?);
let embedding_provider = Arc::new(
EmbeddingProvider::from_config(&config, Some(openai_client.clone())).await?,
);
info!(
embedding_backend = ?config.embedding_backend,
"Embedding provider initialized for worker"
@@ -51,14 +51,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Create global storage manager
let storage = StorageManager::new(&config).await?;
let ingestion_pipeline = Arc::new(IngestionPipeline::new(
db.clone(),
openai_client.clone(),
config,
reranker_pool,
storage,
embedding_provider,
)?);
let ingestion_pipeline = Arc::new(
IngestionPipeline::new(
db.clone(),
openai_client.clone(),
config,
reranker_pool,
storage,
embedding_provider,
)
?,
);
run_worker_loop(db, ingestion_pipeline).await
}

View File

@@ -118,6 +118,7 @@ mod tests {
use common::storage::types::knowledge_relationship::KnowledgeRelationship;
use uuid::Uuid;
#[tokio::test]
async fn test_find_entities_by_relationship_by_id() {
// Setup in-memory database for testing

View File

@@ -301,14 +301,9 @@ mod tests {
.await
.expect("Search strategy retrieval failed");
assert!(
matches!(results, StrategyOutput::Search(_)),
"expected Search output, got {:?}",
results
);
let search_result = match results {
StrategyOutput::Search(sr) => sr,
_ => unreachable!(),
other => panic!("expected Search output, got {:?}", other),
};
// Should return chunks (entities may be empty if none stored)
@@ -317,10 +312,7 @@ mod tests {
"Search strategy should return chunks"
);
assert!(
search_result
.chunks
.iter()
.any(|c| c.chunk.chunk.contains("Tokio")),
search_result.chunks.iter().any(|c| c.chunk.chunk.contains("Tokio")),
"Search results should contain relevant chunks"
);
}

View File

@@ -17,9 +17,7 @@ use std::time::{Duration, Instant};
use tracing::info;
use stages::PipelineContext;
use strategies::{
DefaultStrategyDriver, IngestionDriver, RelationshipSuggestionDriver, SearchStrategyDriver,
};
use strategies::{DefaultStrategyDriver, IngestionDriver, RelationshipSuggestionDriver, SearchStrategyDriver};
// Export StrategyOutput publicly from this module
// (it's defined in lib.rs but we re-export it here)

View File

@@ -10,10 +10,14 @@ use common::{
};
use fastembed::RerankResult;
use futures::{stream::FuturesUnordered, StreamExt};
use std::{cmp::Ordering, collections::HashMap};
use std::{
cmp::Ordering,
collections::HashMap,
};
use tracing::{debug, instrument, warn};
use crate::{
graph::find_entities_by_relationship_by_id,
reranking::RerankerLease,
scoring::{
@@ -440,6 +444,7 @@ pub async fn expand_graph(ctx: &mut PipelineContext<'_>) -> Result<(), AppError>
Ok(())
}
#[instrument(level = "trace", skip_all)]
pub async fn rerank(ctx: &mut PipelineContext<'_>) -> Result<(), AppError> {
let mut applied = false;
@@ -567,6 +572,28 @@ pub async fn collect_vector_chunks(ctx: &mut PipelineContext<'_>) -> Result<(),
// .filter(|c| c.scores.vector.is_some() && c.scores.fts.is_some())
// .count();
// If we have very low overlap (few chunks with both signals), drop FTS-only chunks.
// These are often noisy on keyword-heavy datasets and dilute strong vector hits.
// Keep vector-only and “golden” (vector+FTS) chunks.
let fts_only_count = vector_chunks
.iter()
.filter(|c| c.scores.vector.is_none())
.count();
let both_count = vector_chunks
.iter()
.filter(|c| c.scores.vector.is_some() && c.scores.fts.is_some())
.count();
if fts_only_count > 0 && both_count < 3 {
let before_filter = vector_chunks.len();
vector_chunks.retain(|c| c.scores.vector.is_some());
let after_filter = vector_chunks.len();
debug!(
fts_only_filtered = before_filter - after_filter,
both_signals_preserved = both_count,
"Filtered out FTS-only chunks due to low overlap, preserved golden chunks"
);
}
debug!(
top_fused_scores = ?vector_chunks.iter().take(5).map(|c| c.fused).collect::<Vec<_>>(),
"Fused scores after RRF ordering"

View File

@@ -8,6 +8,8 @@ use super::{
use crate::{RetrievedChunk, RetrievedEntity};
use common::error::AppError;
pub struct DefaultStrategyDriver;
impl DefaultStrategyDriver {
@@ -87,8 +89,8 @@ impl StrategyDriver for IngestionDriver {
}
}
use super::config::SearchTarget;
use crate::SearchResult;
use super::config::SearchTarget;
/// Search strategy driver that retrieves both chunks and entities
pub struct SearchStrategyDriver {