feat: database migrations

This commit is contained in:
Per Stark
2025-05-04 21:16:09 +02:00
parent 43fcf6032d
commit 4d1d1eb22c
28 changed files with 880 additions and 218 deletions

View File

@@ -38,6 +38,7 @@ minijinja = { version = "2.5.0", features = ["loader", "multi_template"] }
minijinja-autoreload = "2.5.0"
minijinja-embed = { version = "2.8.0" }
minijinja-contrib = { version = "2.6.0", features = ["datetime", "timezone"] }
include_dir = "0.7.4"
[features]
test-utils = []

View File

@@ -0,0 +1,15 @@
# Ensure 'analytics:current' record exists
CREATE analytics:current CONTENT {
page_loads: 0,
visitors: 0,
};
# Ensure 'system_settings:current' record exists
CREATE system_settings:current CONTENT {
registrations_enabled: true,
require_email_verification: false,
query_model: "gpt-4o-mini",
processing_model: "gpt-4o-mini",
query_system_prompt: "<Your default query prompt here>",
ingestion_system_prompt: "<Your default ingress prompt here>"
};

View File

@@ -0,0 +1 @@
{"schemas":"--- original\n+++ modified\n@@ -147,6 +147,7 @@\n\n DEFINE FIELD OVERWRITE script_name ON script_migration TYPE string;\n DEFINE FIELD OVERWRITE executed_at ON script_migration TYPE datetime VALUE time::now() READONLY;\n+\n # Defines the schema for the 'system_settings' table.\n\n DEFINE TABLE IF NOT EXISTS system_settings SCHEMALESS;\n","events":null}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,7 @@
# Defines the schema for the 'analytics' table.
DEFINE TABLE IF NOT EXISTS analytics SCHEMALESS;
# Custom fields from the Analytics struct
DEFINE FIELD IF NOT EXISTS page_loads ON analytics TYPE number;
DEFINE FIELD IF NOT EXISTS visitors ON analytics TYPE number;

View File

@@ -0,0 +1,6 @@
# Defines authentication scope and access rules.
# This mirrors the logic previously in SurrealDbClient::setup_auth
DEFINE ACCESS IF NOT EXISTS account ON DATABASE TYPE RECORD
SIGNUP ( CREATE user SET email = $email, password = crypto::argon2::generate($password), anonymous = false, user_id = $user_id) # Ensure user_id is provided if needed
SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(password, $password) );

View File

@@ -0,0 +1,15 @@
# Defines the schema for the 'conversation' table.
DEFINE TABLE IF NOT EXISTS conversation SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE string;
# Custom fields from the Conversation struct
DEFINE FIELD IF NOT EXISTS user_id ON conversation TYPE string;
DEFINE FIELD IF NOT EXISTS title ON conversation TYPE string;
# Add indexes based on query patterns (get_complete_conversation ownership check, get_user_conversations)
DEFINE INDEX IF NOT EXISTS conversation_user_id_idx ON conversation FIELDS user_id;
DEFINE INDEX IF NOT EXISTS conversation_created_at_idx ON conversation FIELDS created_at; # For get_user_conversations ORDER BY

19
common/schemas/file.surql Normal file
View File

@@ -0,0 +1,19 @@
# Defines the schema for the 'file' table (used by FileInfo).
DEFINE TABLE IF NOT EXISTS file SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON file TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON file TYPE string;
# Custom fields from the FileInfo struct
DEFINE FIELD IF NOT EXISTS sha256 ON file TYPE string;
DEFINE FIELD IF NOT EXISTS path ON file TYPE string;
DEFINE FIELD IF NOT EXISTS file_name ON file TYPE string;
DEFINE FIELD IF NOT EXISTS mime_type ON file TYPE string;
DEFINE FIELD IF NOT EXISTS user_id ON file TYPE string;
# Indexes based on usage (get_by_sha, potentially user lookups)
# Using UNIQUE based on the logic in FileInfo::new to prevent duplicates
DEFINE INDEX IF NOT EXISTS file_sha256_idx ON file FIELDS sha256 UNIQUE;
DEFINE INDEX IF NOT EXISTS file_user_id_idx ON file FIELDS user_id;

View File

@@ -0,0 +1,19 @@
# Defines the schema for the 'ingestion_task' table (used by IngestionTask).
DEFINE TABLE IF NOT EXISTS job SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON job TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON job TYPE string;
# Custom fields from the IngestionTask struct
# IngestionPayload is complex, store as object
DEFINE FIELD IF NOT EXISTS content ON job TYPE object;
# IngestionTaskStatus can hold data (InProgress), store as object
DEFINE FIELD IF NOT EXISTS status ON job TYPE object;
DEFINE FIELD IF NOT EXISTS user_id ON job TYPE string;
# Indexes explicitly defined in build_indexes and useful for get_unfinished_tasks
DEFINE INDEX IF NOT EXISTS idx_job_status ON job FIELDS status;
DEFINE INDEX IF NOT EXISTS idx_job_user ON job FIELDS user_id;
DEFINE INDEX IF NOT EXISTS idx_job_created ON job FIELDS created_at;

View File

@@ -0,0 +1,30 @@
# Defines the schema for the 'knowledge_entity' table.
DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE string;
# Custom fields from the KnowledgeEntity struct
DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity TYPE string;
DEFINE FIELD IF NOT EXISTS name ON knowledge_entity TYPE string;
DEFINE FIELD IF NOT EXISTS description ON knowledge_entity TYPE string;
# KnowledgeEntityType is an enum, store as string
DEFINE FIELD IF NOT EXISTS entity_type ON knowledge_entity TYPE string;
# metadata is Option<serde_json::Value>, store as object
DEFINE FIELD IF NOT EXISTS metadata ON knowledge_entity TYPE option<object>;
# Define embedding as a standard array of floats for schema definition
DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity TYPE array<float>;
# The specific vector nature is handled by the index definition below
DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;
# Indexes based on build_indexes and query patterns
# The INDEX definition correctly specifies the vector properties
DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;
DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;
DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;
DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;
DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at; # For get_latest_knowledge_entities

View File

@@ -0,0 +1,19 @@
# Defines the schema for the 'message' table.
DEFINE TABLE IF NOT EXISTS message SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON message TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON message TYPE string;
# Custom fields from the Message struct
DEFINE FIELD IF NOT EXISTS conversation_id ON message TYPE string;
# MessageRole is an enum, store as string
DEFINE FIELD IF NOT EXISTS role ON message TYPE string;
DEFINE FIELD IF NOT EXISTS content ON message TYPE string;
# references is Option<Vec<String>>, store as array<string>
DEFINE FIELD IF NOT EXISTS references ON message TYPE option<array<string>>;
# Indexes based on query patterns (get_complete_conversation)
DEFINE INDEX IF NOT EXISTS message_conversation_id_idx ON message FIELDS conversation_id;
DEFINE INDEX IF NOT EXISTS message_updated_at_idx ON message FIELDS updated_at; # For ORDER BY

View File

@@ -0,0 +1,19 @@
# Defines the 'relates_to' edge table for KnowledgeRelationships.
# Edges connect nodes, in this case knowledge_entity records.
# Define the edge table itself, enforcing connections between knowledge_entity records
# SCHEMAFULL requires all fields to be defined, maybe start with SCHEMALESS if metadata might vary
DEFINE TABLE IF NOT EXISTS relates_to SCHEMALESS TYPE RELATION FROM knowledge_entity TO knowledge_entity;
# Define the metadata field within the edge
# RelationshipMetadata is a struct, store as object
DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;
# Optionally, define fields within the metadata object for stricter schema (requires SCHEMAFULL on table)
# DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;
# DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;
# DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;
# Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)
DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;
DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;

View File

@@ -0,0 +1,7 @@
DEFINE TABLE OVERWRITE script_migration SCHEMAFULL
PERMISSIONS
FOR select FULL
FOR create, update, delete NONE;
DEFINE FIELD OVERWRITE script_name ON script_migration TYPE string;
DEFINE FIELD OVERWRITE executed_at ON script_migration TYPE datetime VALUE time::now() READONLY;

View File

@@ -0,0 +1,11 @@
# Defines the schema for the 'system_settings' table.
DEFINE TABLE IF NOT EXISTS system_settings SCHEMALESS;
# Custom fields from the SystemSettings struct
DEFINE FIELD IF NOT EXISTS registrations_enabled ON system_settings TYPE bool;
DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;
DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;

View File

@@ -0,0 +1,23 @@
# Defines the schema for the 'text_chunk' table.
DEFINE TABLE IF NOT EXISTS text_chunk SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE string;
# Custom fields from the TextChunk struct
DEFINE FIELD IF NOT EXISTS source_id ON text_chunk TYPE string;
DEFINE FIELD IF NOT EXISTS chunk ON text_chunk TYPE string;
# Define embedding as a standard array of floats for schema definition
DEFINE FIELD IF NOT EXISTS embedding ON text_chunk TYPE array<float>;
# The specific vector nature is handled by the index definition below
DEFINE FIELD IF NOT EXISTS user_id ON text_chunk TYPE string;
# Indexes based on build_indexes and query patterns (delete_by_source_id)
# The INDEX definition correctly specifies the vector properties
DEFINE INDEX IF NOT EXISTS idx_embedding_chunks ON text_chunk FIELDS embedding HNSW DIMENSION 1536;
DEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;
DEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;

View File

@@ -0,0 +1,22 @@
# Defines the schema for the 'text_content' table.
DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE string;
# Custom fields from the TextContent struct
DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
# FileInfo is a struct, store as object
DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
# UrlInfo is a struct, store as object
DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
DEFINE FIELD IF NOT EXISTS instructions ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
# Indexes based on query patterns (get_latest_text_contents, get_text_contents_by_category)
DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;
DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;
DEFINE INDEX IF NOT EXISTS text_content_category_idx ON text_content FIELDS category;

20
common/schemas/user.surql Normal file
View File

@@ -0,0 +1,20 @@
# Defines the schema for the 'user' table.
# NOTE: Authentication scope and access rules are defined in auth.surql
DEFINE TABLE IF NOT EXISTS user SCHEMALESS;
# Standard fields
DEFINE FIELD IF NOT EXISTS created_at ON user TYPE string;
DEFINE FIELD IF NOT EXISTS updated_at ON user TYPE string;
# Custom fields from the User struct
DEFINE FIELD IF NOT EXISTS email ON user TYPE string;
DEFINE FIELD IF NOT EXISTS password ON user TYPE string; # Stores the hashed password
DEFINE FIELD IF NOT EXISTS anonymous ON user TYPE bool;
DEFINE FIELD IF NOT EXISTS api_key ON user TYPE option<string>;
DEFINE FIELD IF NOT EXISTS admin ON user TYPE bool;
DEFINE FIELD IF NOT EXISTS timezone ON user TYPE string;
# Indexes based on query patterns (find_by_email, find_by_api_key, unique constraint from setup_auth)
DEFINE INDEX IF NOT EXISTS user_email_idx ON user FIELDS email UNIQUE;
DEFINE INDEX IF NOT EXISTS user_api_key_idx ON user FIELDS api_key;

View File

@@ -1,15 +1,18 @@
use crate::error::AppError;
use super::types::{analytics::Analytics, system_settings::SystemSettings, StoredObject};
use crate::error::AppError;
use axum_session::{SessionConfig, SessionError, SessionStore};
use axum_session_surreal::SessionSurrealPool;
use futures::Stream;
use include_dir::{include_dir, Dir};
use std::{ops::Deref, sync::Arc};
use surrealdb::{
engine::any::{connect, Any},
opt::auth::Root,
Error, Notification, Surreal,
};
use surrealdb_migrations::MigrationRunner;
static MIGRATIONS_DIR: Dir<'_> = include_dir!("$CARGO_MANIFEST_DIR/");
#[derive(Clone)]
pub struct SurrealDbClient {
@@ -56,44 +59,60 @@ impl SurrealDbClient {
.await
}
pub async fn ensure_initialized(&self) -> Result<(), AppError> {
Self::build_indexes(self).await?;
Self::setup_auth(self).await?;
Analytics::ensure_initialized(self).await?;
SystemSettings::ensure_initialized(self).await?;
/// Applies all pending database migrations found in the embedded MIGRATIONS_DIR.
///
/// This function should be called during application startup, after connecting to
/// the database and selecting the appropriate namespace and database, but before
/// the application starts performing operations that rely on the schema.
pub async fn apply_migrations(&self) -> Result<(), AppError> {
// Instantiate the runner, load embedded files, and run 'up'
MigrationRunner::new(&self.client)
.load_files(&MIGRATIONS_DIR)
.up()
.await
.map_err(|e| AppError::Processing(e.to_string()))?;
Ok(())
}
pub async fn setup_auth(&self) -> Result<(), Error> {
self.client.query(
"DEFINE TABLE user SCHEMALESS;
DEFINE INDEX unique_name ON TABLE user FIELDS email UNIQUE;
DEFINE ACCESS account ON DATABASE TYPE RECORD
SIGNUP ( CREATE user SET email = $email, password = crypto::argon2::generate($password), anonymous = false, user_id = $user_id)
SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(password, $password) );",
)
.await?;
Ok(())
}
// pub async fn ensure_initialized(&self) -> Result<(), AppError> {
// Self::build_indexes(self).await?;
// Self::setup_auth(self).await?;
pub async fn build_indexes(&self) -> Result<(), Error> {
self.client.query("DEFINE INDEX idx_embedding_chunks ON text_chunk FIELDS embedding HNSW DIMENSION 1536").await?;
self.client.query("DEFINE INDEX idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536").await?;
// Analytics::ensure_initialized(self).await?;
// SystemSettings::ensure_initialized(self).await?;
self.client
.query("DEFINE INDEX idx_job_status ON job FIELDS status")
.await?;
self.client
.query("DEFINE INDEX idx_job_user ON job FIELDS user_id")
.await?;
self.client
.query("DEFINE INDEX idx_job_created ON job FIELDS created_at")
.await?;
// Ok(())
// }
Ok(())
}
// pub async fn setup_auth(&self) -> Result<(), Error> {
// self.client.query(
// "DEFINE TABLE user SCHEMALESS;
// DEFINE INDEX unique_name ON TABLE user FIELDS email UNIQUE;
// DEFINE ACCESS account ON DATABASE TYPE RECORD
// SIGNUP ( CREATE user SET email = $email, password = crypto::argon2::generate($password), anonymous = false, user_id = $user_id)
// SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(password, $password) );",
// )
// .await?;
// Ok(())
// }
// pub async fn build_indexes(&self) -> Result<(), Error> {
// self.client.query("DEFINE INDEX idx_embedding_chunks ON text_chunk FIELDS embedding HNSW DIMENSION 1536").await?;
// self.client.query("DEFINE INDEX idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536").await?;
// self.client
// .query("DEFINE INDEX idx_job_status ON job FIELDS status")
// .await?;
// self.client
// .query("DEFINE INDEX idx_job_user ON job FIELDS user_id")
// .await?;
// self.client
// .query("DEFINE INDEX idx_job_created ON job FIELDS created_at")
// .await?;
// Ok(())
// }
pub async fn rebuild_indexes(&self) -> Result<(), Error> {
self.client
@@ -222,7 +241,7 @@ mod tests {
.expect("Failed to start in-memory surrealdb");
// Call your initialization
db.ensure_initialized()
db.apply_migrations()
.await
.expect("Failed to initialize schema");
@@ -268,25 +287,15 @@ mod tests {
}
#[tokio::test]
async fn test_setup_auth() {
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string(); // ensures isolation per test run
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
// Should not panic or fail
db.setup_auth().await.expect("Failed to setup auth");
}
#[tokio::test]
async fn test_build_indexes() {
async fn test_applying_migrations() {
let namespace = "test_ns";
let database = &Uuid::new_v4().to_string();
let db = SurrealDbClient::memory(namespace, database)
.await
.expect("Failed to start in-memory surrealdb");
db.build_indexes().await.expect("Failed to build indexes");
db.apply_migrations()
.await
.expect("Failed to build indexes");
}
}

View File

@@ -18,7 +18,7 @@ pub enum IngestionTaskStatus {
Cancelled,
}
stored_object!(IngestionTask, "job", {
stored_object!(IngestionTask, "ingestion_task", {
content: IngestionPayload,
status: IngestionTaskStatus,
user_id: String

View File

@@ -199,7 +199,7 @@ impl User {
.client
.query(
"UPDATE type::thing('user', $id)
SET api_key = NULL
SET api_key = test_string_nullish
RETURN AFTER",
)
.bind(("id", id.to_owned()))
@@ -520,9 +520,9 @@ mod tests {
.await
.expect("Failed to start in-memory surrealdb");
db.ensure_initialized()
db.apply_migrations()
.await
.expect("Failed to setup the systemsettings");
.expect("Failed to setup the migrations");
db
}