release: 1.0.5

This commit is contained in:
Per Stark
2026-06-24 22:02:31 +02:00
parent ba3fd6ed46
commit d273390de8
118 changed files with 989 additions and 690 deletions
+1 -1
View File
@@ -1,7 +1,7 @@
[package]
name = "common"
version = "0.1.0"
edition = "2021"
edition = "2024"
license = "AGPL-3.0-or-later"
[lints]
+4 -4
View File
@@ -3,14 +3,14 @@ use crate::error::AppError;
use axum_session::{SessionConfig, SessionError, SessionStore};
use axum_session_surreal::SessionSurrealPool;
use futures::Stream;
use include_dir::{include_dir, Dir};
use serde::de::DeserializeOwned;
use include_dir::{Dir, include_dir};
use serde::Serialize;
use serde::de::DeserializeOwned;
use std::{ops::Deref, sync::Arc};
use surrealdb::{
engine::any::{connect, Any},
opt::auth::{Namespace, Root},
Error, Notification, Surreal,
engine::any::{Any, connect},
opt::auth::{Namespace, Root},
};
use surrealdb_migrations::MigrationRunner;
use tracing::debug;
+82 -55
View File
@@ -2,14 +2,14 @@ use std::io::ErrorKind;
use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
use anyhow::{anyhow, Context, Result as AnyResult};
use anyhow::{Context, Result as AnyResult, anyhow};
use bytes::Bytes;
use futures::stream::BoxStream;
use futures::{StreamExt, TryStreamExt};
use object_store::aws::AmazonS3Builder;
use object_store::local::LocalFileSystem;
use object_store::memory::InMemory;
use object_store::{path::Path as ObjPath, ObjectStore};
use object_store::{ObjectStore, path::Path as ObjPath};
use crate::utils::config::{AppConfig, StorageKind};
@@ -461,9 +461,12 @@ pub mod testing {
pub async fn new_s3() -> object_store::Result<Self> {
// Ensure credentials are set for MinIO
// We set these env vars for the process, which AmazonS3Builder will pick up
std::env::set_var("AWS_ACCESS_KEY_ID", "minioadmin");
std::env::set_var("AWS_SECRET_ACCESS_KEY", "minioadmin");
std::env::set_var("AWS_REGION", "us-east-1");
// SAFETY: test setup runs before concurrent S3 client use in this process.
unsafe {
std::env::set_var("AWS_ACCESS_KEY_ID", "minioadmin");
std::env::set_var("AWS_SECRET_ACCESS_KEY", "minioadmin");
std::env::set_var("AWS_REGION", "us-east-1");
}
let cfg = test_config_s3();
let storage = StorageManager::new(&cfg).await?;
@@ -543,10 +546,10 @@ pub mod testing {
impl Drop for TestStorageManager {
fn drop(&mut self) {
// Clean up temporary directories for local storage
if let Some((_, path)) = &self.temp_dir {
if path.exists() {
let _ = std::fs::remove_dir_all(path);
}
if let Some((_, path)) = &self.temp_dir
&& path.exists()
{
let _ = std::fs::remove_dir_all(path);
}
}
}
@@ -690,20 +693,24 @@ mod tests {
assert_eq!(retrieved.as_ref(), data);
// Test exists
assert!(storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?);
assert!(
storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?
);
// Test delete
storage
.delete_prefix("test/data/")
.await
.with_context(|| "delete".to_string())?;
assert!(!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?);
assert!(
!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?
);
Ok(())
}
@@ -741,20 +748,24 @@ mod tests {
.with_context(|| "object directory exists after write".to_string())?;
// Test exists
assert!(storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?);
assert!(
storage
.exists(location)
.await
.with_context(|| "exists check".to_string())?
);
// Test delete
storage
.delete_prefix("test/data/")
.await
.with_context(|| "delete".to_string())?;
assert!(!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?);
assert!(
!storage
.exists(location)
.await
.with_context(|| "exists check after delete".to_string())?
);
assert!(
tokio::fs::metadata(&object_dir).await.is_err(),
"object directory should be removed"
@@ -846,12 +857,16 @@ mod tests {
.await
.with_context(|| "list dir1".to_string())?;
assert_eq!(dir1_files.len(), 2);
assert!(dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file1.txt")));
assert!(dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file2.txt")));
assert!(
dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file1.txt"))
);
assert!(
dir1_files
.iter()
.any(|meta| meta.location.as_ref().contains("file2.txt"))
);
// Test listing non-existent prefix
let empty_files = storage
@@ -918,10 +933,12 @@ mod tests {
.with_context(|| "get".to_string())?;
assert_eq!(retrieved.as_ref(), data);
assert!(storage
.exists(location)
.await
.with_context(|| "exists".to_string())?);
assert!(
storage
.exists(location)
.await
.with_context(|| "exists".to_string())?
);
assert_eq!(*storage.backend_kind(), StorageKind::Memory);
Ok(())
@@ -975,10 +992,12 @@ mod tests {
assert_eq!(retrieved.as_ref(), data);
// Test existence check
assert!(test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?);
assert!(
test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?
);
// Test list
let files = test_storage
@@ -992,10 +1011,12 @@ mod tests {
.delete_prefix("test/storage/")
.await
.with_context(|| "delete".to_string())?;
assert!(!test_storage
.exists(location)
.await
.with_context(|| "exists after delete".to_string())?);
assert!(
!test_storage
.exists(location)
.await
.with_context(|| "exists after delete".to_string())?
);
Ok(())
}
@@ -1019,10 +1040,12 @@ mod tests {
.with_context(|| "get".to_string())?;
assert_eq!(retrieved.as_ref(), data);
assert!(test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?);
assert!(
test_storage
.exists(location)
.await
.with_context(|| "exists".to_string())?
);
Ok(())
}
@@ -1119,20 +1142,24 @@ mod tests {
assert_eq!(retrieved.as_ref(), data);
// Test exists
assert!(storage
.exists(&location)
.await
.with_context(|| "exists".to_string())?);
assert!(
storage
.exists(&location)
.await
.with_context(|| "exists".to_string())?
);
// Test delete
storage
.delete_prefix(&format!("{prefix}/"))
.await
.with_context(|| "delete".to_string())?;
assert!(!storage
.exists(&location)
.await
.with_context(|| "exists after delete".to_string())?);
assert!(
!storage
.exists(&location)
.await
.with_context(|| "exists after delete".to_string())?
);
Ok(())
}
+1 -1
View File
@@ -1,4 +1,4 @@
use crate::storage::types::{user::User, StoredObject};
use crate::storage::types::{StoredObject, user::User};
use crate::utils::serde_helpers::deserialize_flexible_id;
use serde::{Deserialize, Serialize};
+4 -3
View File
@@ -315,9 +315,10 @@ impl IngestionTask {
"#;
debug_assert!(lifecycle::pending().reserve().is_ok());
debug_assert!(lifecycle::pending().reserve().is_ok_and(|m| m
.start_processing()
.is_ok_and(|m| m.fail().is_ok_and(|m| m.reserve().is_ok()))));
debug_assert!(lifecycle::pending().reserve().is_ok_and(|m| {
m.start_processing()
.is_ok_and(|m| m.fail().is_ok_and(|m| m.reserve().is_ok()))
}));
let mut result = db
.client
+15 -9
View File
@@ -399,7 +399,9 @@ impl KnowledgeEntity {
if embedding.len() != new_dimensions {
let err_msg = format!(
"CRITICAL: Generated embedding for entity {} has incorrect dimension ({}). Expected {}. Aborting.",
entity.id, embedding.len(), new_dimensions
entity.id,
embedding.len(),
new_dimensions
);
error!("{err_msg}");
return Err(AppError::internal(err_msg));
@@ -864,14 +866,18 @@ mod tests {
let rid_e1 = surrealdb::RecordId::from_table_key(KnowledgeEntity::table_name(), &e1.id);
let rid_e2 = surrealdb::RecordId::from_table_key(KnowledgeEntity::table_name(), &e2.id);
assert!(KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e1)
.await
.with_context(|| "get embedding e1".to_string())?
.is_some());
assert!(KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e2)
.await
.with_context(|| "get embedding e2".to_string())?
.is_some());
assert!(
KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e1)
.await
.with_context(|| "get embedding e1".to_string())?
.is_some()
);
assert!(
KnowledgeEntityEmbedding::get_by_record_id(&db, &rid_e2)
.await
.with_context(|| "get embedding e2".to_string())?
.is_some()
);
let results = KnowledgeEntity::vector_search(2, &[0.0, 1.0, 0.0], &db, &user_id)
.await
@@ -287,10 +287,12 @@ mod tests {
.with_context(|| "get entity2 embedding after delete".to_string())?
.is_none()
);
assert!(KnowledgeEntityEmbedding::get_by_record_id(&db, &other_rid)
.await
.with_context(|| "get other embedding after delete".to_string())?
.is_some());
assert!(
KnowledgeEntityEmbedding::get_by_record_id(&db, &other_rid)
.await
.with_context(|| "get other embedding after delete".to_string())?
.is_some()
);
Ok(())
}
@@ -575,12 +575,16 @@ mod tests {
KnowledgeRelationship::delete_relationships_by_source_id(shared_source, user_a, &db)
.await?;
assert!(get_relationship_by_id(&owner_relationship_id, &db)
.await
.is_none());
assert!(get_relationship_by_id(&other_relationship_id, &db)
.await
.is_some());
assert!(
get_relationship_by_id(&owner_relationship_id, &db)
.await
.is_none()
);
assert!(
get_relationship_by_id(&other_relationship_id, &db)
.await
.is_some()
);
Ok(())
}
+12 -12
View File
@@ -223,16 +223,16 @@ impl SystemSettings {
needs_update = true;
}
if let Some(model) = provider_model {
if settings.embedding_model != model {
tracing::info!(
old_model = %settings.embedding_model,
new_model = %model,
"Embedding model changed, updating SystemSettings"
);
settings.embedding_model = model;
needs_update = true;
}
if let Some(model) = provider_model
&& settings.embedding_model != model
{
tracing::info!(
old_model = %settings.embedding_model,
new_model = %model,
"Embedding model changed, updating SystemSettings"
);
settings.embedding_model = model;
needs_update = true;
}
if needs_update {
@@ -719,8 +719,8 @@ mod tests {
}
#[tokio::test]
async fn test_should_change_embedding_length_on_indexes_when_switching_length(
) -> anyhow::Result<()> {
async fn test_should_change_embedding_length_on_indexes_when_switching_length()
-> anyhow::Result<()> {
use crate::utils::embedding::EmbeddingProvider;
let db = setup_test_db().await?;
+4 -2
View File
@@ -4,7 +4,7 @@ use std::fmt::Write;
use crate::storage::indexes::hnsw_index_overwrite_sql;
use crate::storage::types::{
text_chunk_embedding::TextChunkEmbedding, EmbeddingRecord, HasEmbedding,
EmbeddingRecord, HasEmbedding, text_chunk_embedding::TextChunkEmbedding,
};
use crate::utils::embedding::RE_EMBED_BATCH_SIZE;
use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
@@ -216,7 +216,9 @@ impl TextChunk {
if embedding.len() != new_dimensions {
let err_msg = format!(
"CRITICAL: Generated embedding for chunk {} has incorrect dimension ({}). Expected {}. Aborting.",
chunk.id, embedding.len(), new_dimensions
chunk.id,
embedding.len(),
new_dimensions
);
error!("{err_msg}");
return Err(AppError::internal(err_msg));
@@ -235,35 +235,47 @@ mod tests {
.with_context(|| format!("store embedding for {key}"))?;
}
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "get chunk1".to_string())?
.is_some());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "get chunk2".to_string())?
.is_some());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "get chunk_other".to_string())?
.is_some());
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "get chunk1".to_string())?
.is_some()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "get chunk2".to_string())?
.is_some()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "get chunk_other".to_string())?
.is_some()
);
TextChunkEmbedding::delete_by_source_id(source_id, &db)
.await
.with_context(|| "Failed to delete by source_id".to_string())?;
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "check chunk1".to_string())?
.is_none());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "check chunk2".to_string())?
.is_none());
assert!(TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "check chunk_other".to_string())?
.is_some());
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk1_rid)
.await
.with_context(|| "check chunk1".to_string())?
.is_none()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk2_rid)
.await
.with_context(|| "check chunk2".to_string())?
.is_none()
);
assert!(
TextChunkEmbedding::get_by_record_id(&db, &chunk_other_rid)
.await
.with_context(|| "check chunk_other".to_string())?
.is_some()
);
Ok(())
}
+3 -3
View File
@@ -1,8 +1,8 @@
use std::collections::{HashMap, HashSet};
use std::str::FromStr;
use surrealdb::opt::PatchOp;
use surrealdb::RecordId;
use surrealdb::opt::PatchOp;
use uuid::Uuid;
use crate::{error::AppError, storage::db::SurrealDbClient, stored_object};
@@ -682,8 +682,8 @@ mod tests {
}
#[tokio::test]
async fn clear_ingested_children_removes_chunks_entities_and_relationships(
) -> anyhow::Result<()> {
async fn clear_ingested_children_removes_chunks_entities_and_relationships()
-> anyhow::Result<()> {
let db = setup_test_db().await?;
let user_id = "clear-user";
let source_id = Uuid::new_v4().to_string();
+2 -2
View File
@@ -3,7 +3,7 @@ use anyhow::anyhow;
use async_trait::async_trait;
use axum_session_auth::Authentication;
use chrono_tz::Tz;
use surrealdb::{engine::any::Any, Surreal};
use surrealdb::{Surreal, engine::any::Any};
use uuid::Uuid;
use super::text_chunk::TextChunk;
@@ -729,7 +729,7 @@ mod tests {
use super::*;
use crate::storage::types::ingestion_payload::IngestionPayload;
use crate::storage::types::ingestion_task::{IngestionTask, TaskState, MAX_ATTEMPTS};
use crate::storage::types::ingestion_task::{IngestionTask, MAX_ATTEMPTS, TaskState};
use std::collections::HashSet;
use crate::test_utils::setup_test_db;
+2 -2
View File
@@ -8,8 +8,8 @@ use crate::storage::{
db::SurrealDbClient,
indexes::{ensure_runtime, rebuild},
types::{
knowledge_entity_embedding::KnowledgeEntityEmbedding, system_settings::SystemSettings,
text_chunk_embedding::TextChunkEmbedding, EmbeddingRecord,
EmbeddingRecord, knowledge_entity_embedding::KnowledgeEntityEmbedding,
system_settings::SystemSettings, text_chunk_embedding::TextChunkEmbedding,
},
};
+8 -2
View File
@@ -198,7 +198,10 @@ pub fn ensure_ort_path() {
exe.join("lib").join("onnxruntime.dll"),
] {
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
// SAFETY: `Once` ensures this runs on a single thread during startup.
unsafe {
env::set_var("ORT_DYLIB_PATH", p);
}
return;
}
}
@@ -210,7 +213,10 @@ pub fn ensure_ort_path() {
};
let p = exe.join("lib").join(name);
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
// SAFETY: `Once` ensures this runs on a single thread during startup.
unsafe {
env::set_var("ORT_DYLIB_PATH", p);
}
}
});
}
+3 -4
View File
@@ -9,7 +9,7 @@ use std::{
use serde::Serialize;
use tracing::warn;
use async_openai::{types::embeddings::CreateEmbeddingRequestArgs, Client};
use async_openai::{Client, types::embeddings::CreateEmbeddingRequestArgs};
use fastembed::{EmbeddingModel, ModelTrait, TextEmbedding, TextInitOptions};
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
@@ -588,9 +588,8 @@ mod tests {
#![allow(clippy::expect_used)]
use super::{
align_fastembed_system_settings, fastembed_model_dimension,
list_fastembed_embedding_models, resolve_fastembed_model_code, EmbeddingError,
DEFAULT_FASTEMBED_MODEL_CODE,
DEFAULT_FASTEMBED_MODEL_CODE, EmbeddingError, align_fastembed_system_settings,
fastembed_model_dimension, list_fastembed_embedding_models, resolve_fastembed_model_code,
};
use crate::storage::types::system_settings::SystemSettings;
use crate::utils::config::{AppConfig, EmbeddingBackend, ParseEmbeddingBackendError};
+7 -7
View File
@@ -47,13 +47,13 @@ pub fn validate_ingest_input(
)));
}
if let Some(content) = content {
if content.len() > config.ingest_max_content_bytes {
return Err(IngestValidationError::PayloadTooLarge(format!(
"content is too large: maximum allowed is {} bytes",
config.ingest_max_content_bytes
)));
}
if let Some(content) = content
&& content.len() > config.ingest_max_content_bytes
{
return Err(IngestValidationError::PayloadTooLarge(format!(
"content is too large: maximum allowed is {} bytes",
config.ingest_max_content_bytes
)));
}
if ctx.len() > config.ingest_max_context_bytes {
+1 -1
View File
@@ -1,4 +1,4 @@
pub use minijinja::{path_loader, Environment, Value};
pub use minijinja::{Environment, Value, path_loader};
pub use minijinja_autoreload::AutoReloader;
pub use minijinja_contrib;
pub use minijinja_embed;