feat: task archive

fix: simplified
This commit is contained in:
Per Stark
2025-10-13 15:02:22 +02:00
parent 41fc7bb99c
commit aa0b1462a1
7 changed files with 404 additions and 73 deletions

View File

@@ -73,7 +73,6 @@ pub struct TaskErrorInfo {
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
enum TaskTransition { enum TaskTransition {
Reserve,
StartProcessing, StartProcessing,
Succeed, Succeed,
Fail, Fail,
@@ -85,7 +84,6 @@ enum TaskTransition {
impl TaskTransition { impl TaskTransition {
fn as_str(&self) -> &'static str { fn as_str(&self) -> &'static str {
match self { match self {
TaskTransition::Reserve => "reserve",
TaskTransition::StartProcessing => "start_processing", TaskTransition::StartProcessing => "start_processing",
TaskTransition::Succeed => "succeed", TaskTransition::Succeed => "succeed",
TaskTransition::Fail => "fail", TaskTransition::Fail => "fail",
@@ -162,53 +160,6 @@ fn invalid_transition(state: &TaskState, event: TaskTransition) -> AppError {
)) ))
} }
fn compute_next_state(state: &TaskState, event: TaskTransition) -> Result<TaskState, AppError> {
use lifecycle::*;
match (state, event) {
(TaskState::Pending, TaskTransition::Reserve) => pending()
.reserve()
.map(|_| TaskState::Reserved)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Failed, TaskTransition::Reserve) => failed()
.reserve()
.map(|_| TaskState::Reserved)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Reserved, TaskTransition::StartProcessing) => reserved()
.start_processing()
.map(|_| TaskState::Processing)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Processing, TaskTransition::Succeed) => processing()
.succeed()
.map(|_| TaskState::Succeeded)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Processing, TaskTransition::Fail) => processing()
.fail()
.map(|_| TaskState::Failed)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Pending, TaskTransition::Cancel) => pending()
.cancel()
.map(|_| TaskState::Cancelled)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Reserved, TaskTransition::Cancel) => reserved()
.cancel()
.map(|_| TaskState::Cancelled)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Processing, TaskTransition::Cancel) => processing()
.cancel()
.map(|_| TaskState::Cancelled)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Failed, TaskTransition::DeadLetter) => failed()
.deadletter()
.map(|_| TaskState::DeadLetter)
.map_err(|_| invalid_transition(state, event)),
(TaskState::Reserved, TaskTransition::Release) => reserved()
.release()
.map(|_| TaskState::Pending)
.map_err(|_| invalid_transition(state, event)),
_ => Err(invalid_transition(state, event)),
}
}
stored_object!(IngestionTask, "ingestion_task", { stored_object!(IngestionTask, "ingestion_task", {
content: IngestionPayload, content: IngestionPayload,
state: TaskState, state: TaskState,
@@ -284,8 +235,8 @@ impl IngestionTask {
now: chrono::DateTime<chrono::Utc>, now: chrono::DateTime<chrono::Utc>,
lease_duration: Duration, lease_duration: Duration,
) -> Result<Option<IngestionTask>, AppError> { ) -> Result<Option<IngestionTask>, AppError> {
debug_assert!(compute_next_state(&TaskState::Pending, TaskTransition::Reserve).is_ok()); debug_assert!(lifecycle::pending().reserve().is_ok());
debug_assert!(compute_next_state(&TaskState::Failed, TaskTransition::Reserve).is_ok()); debug_assert!(lifecycle::failed().reserve().is_ok());
const CLAIM_QUERY: &str = r#" const CLAIM_QUERY: &str = r#"
UPDATE ( UPDATE (
@@ -348,9 +299,6 @@ impl IngestionTask {
} }
pub async fn mark_processing(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> { pub async fn mark_processing(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
let next = compute_next_state(&self.state, TaskTransition::StartProcessing)?;
debug_assert_eq!(next, TaskState::Processing);
const START_PROCESSING_QUERY: &str = r#" const START_PROCESSING_QUERY: &str = r#"
UPDATE type::thing($table, $id) UPDATE type::thing($table, $id)
SET state = $processing, SET state = $processing,
@@ -377,9 +325,6 @@ impl IngestionTask {
} }
pub async fn mark_succeeded(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> { pub async fn mark_succeeded(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
let next = compute_next_state(&self.state, TaskTransition::Succeed)?;
debug_assert_eq!(next, TaskState::Succeeded);
const COMPLETE_QUERY: &str = r#" const COMPLETE_QUERY: &str = r#"
UPDATE type::thing($table, $id) UPDATE type::thing($table, $id)
SET state = $succeeded, SET state = $succeeded,
@@ -416,9 +361,6 @@ impl IngestionTask {
retry_delay: Duration, retry_delay: Duration,
db: &SurrealDbClient, db: &SurrealDbClient,
) -> Result<IngestionTask, AppError> { ) -> Result<IngestionTask, AppError> {
let next = compute_next_state(&self.state, TaskTransition::Fail)?;
debug_assert_eq!(next, TaskState::Failed);
let now = chrono::Utc::now(); let now = chrono::Utc::now();
let retry_at = now let retry_at = now
+ ChronoDuration::from_std(retry_delay).unwrap_or_else(|_| ChronoDuration::seconds(30)); + ChronoDuration::from_std(retry_delay).unwrap_or_else(|_| ChronoDuration::seconds(30));
@@ -460,9 +402,6 @@ impl IngestionTask {
error: TaskErrorInfo, error: TaskErrorInfo,
db: &SurrealDbClient, db: &SurrealDbClient,
) -> Result<IngestionTask, AppError> { ) -> Result<IngestionTask, AppError> {
let next = compute_next_state(&self.state, TaskTransition::DeadLetter)?;
debug_assert_eq!(next, TaskState::DeadLetter);
const DEAD_LETTER_QUERY: &str = r#" const DEAD_LETTER_QUERY: &str = r#"
UPDATE type::thing($table, $id) UPDATE type::thing($table, $id)
SET state = $dead, SET state = $dead,
@@ -495,8 +434,6 @@ impl IngestionTask {
} }
pub async fn mark_cancelled(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> { pub async fn mark_cancelled(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
compute_next_state(&self.state, TaskTransition::Cancel)?;
const CANCEL_QUERY: &str = r#" const CANCEL_QUERY: &str = r#"
UPDATE type::thing($table, $id) UPDATE type::thing($table, $id)
SET state = $cancelled, SET state = $cancelled,
@@ -530,8 +467,6 @@ impl IngestionTask {
} }
pub async fn release(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> { pub async fn release(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
compute_next_state(&self.state, TaskTransition::Release)?;
const RELEASE_QUERY: &str = r#" const RELEASE_QUERY: &str = r#"
UPDATE type::thing($table, $id) UPDATE type::thing($table, $id)
SET state = $pending, SET state = $pending,
@@ -708,4 +643,86 @@ mod tests {
assert_eq!(dead.state, TaskState::DeadLetter); assert_eq!(dead.state, TaskState::DeadLetter);
assert_eq!(dead.error_message.as_deref(), Some("failed")); assert_eq!(dead.error_message.as_deref(), Some("failed"));
} }
#[tokio::test]
async fn test_mark_processing_requires_reservation() {
let db = memory_db().await;
let user_id = "user123";
let payload = create_payload(user_id);
let task = IngestionTask::new(payload.clone(), user_id.to_string()).await;
db.store_item(task.clone()).await.expect("store");
let err = task
.mark_processing(&db)
.await
.expect_err("processing should fail without reservation");
match err {
AppError::Validation(message) => {
assert!(
message.contains("Pending -> start_processing"),
"unexpected message: {message}"
);
}
other => panic!("expected validation error, got {other:?}"),
}
}
#[tokio::test]
async fn test_mark_failed_requires_processing() {
let db = memory_db().await;
let user_id = "user123";
let payload = create_payload(user_id);
let task = IngestionTask::new(payload.clone(), user_id.to_string()).await;
db.store_item(task.clone()).await.expect("store");
let err = task
.mark_failed(
TaskErrorInfo {
code: None,
message: "boom".into(),
},
Duration::from_secs(30),
&db,
)
.await
.expect_err("failing should require processing state");
match err {
AppError::Validation(message) => {
assert!(
message.contains("Pending -> fail"),
"unexpected message: {message}"
);
}
other => panic!("expected validation error, got {other:?}"),
}
}
#[tokio::test]
async fn test_release_requires_reservation() {
let db = memory_db().await;
let user_id = "user123";
let payload = create_payload(user_id);
let task = IngestionTask::new(payload.clone(), user_id.to_string()).await;
db.store_item(task.clone()).await.expect("store");
let err = task
.release(&db)
.await
.expect_err("release should require reserved state");
match err {
AppError::Validation(message) => {
assert!(
message.contains("Pending -> release"),
"unexpected message: {message}"
);
}
other => panic!("expected validation error, got {other:?}"),
}
}
} }

View File

@@ -559,6 +559,25 @@ impl User {
Ok(jobs) Ok(jobs)
} }
/// Gets all ingestion tasks for the specified user ordered by newest first
pub async fn get_all_ingestion_tasks(
user_id: &str,
db: &SurrealDbClient,
) -> Result<Vec<IngestionTask>, AppError> {
let jobs: Vec<IngestionTask> = db
.query(
"SELECT * FROM type::table($table)
WHERE user_id = $user_id
ORDER BY created_at DESC",
)
.bind(("table", IngestionTask::table_name()))
.bind(("user_id", user_id.to_owned()))
.await?
.take(0)?;
Ok(jobs)
}
/// Validate and delete job /// Validate and delete job
pub async fn validate_and_delete_job( pub async fn validate_and_delete_job(
id: &str, id: &str,
@@ -771,6 +790,49 @@ mod tests {
assert_eq!(unfinished_ids.len(), 3); assert_eq!(unfinished_ids.len(), 3);
} }
#[tokio::test]
async fn test_get_all_ingestion_tasks_returns_sorted() {
let db = setup_test_db().await;
let user_id = "archive_user";
let other_user_id = "other_user";
let payload = IngestionPayload::Text {
text: "One".to_string(),
context: "Context".to_string(),
category: "Category".to_string(),
user_id: user_id.to_string(),
};
// Oldest task
let mut first = IngestionTask::new(payload.clone(), user_id.to_string()).await;
first.created_at = first.created_at - chrono::Duration::minutes(1);
first.updated_at = first.created_at;
first.state = TaskState::Succeeded;
db.store_item(first.clone()).await.expect("store first");
// Latest task
let mut second = IngestionTask::new(payload.clone(), user_id.to_string()).await;
second.state = TaskState::Processing;
db.store_item(second.clone()).await.expect("store second");
let other_payload = IngestionPayload::Text {
text: "Other".to_string(),
context: "Context".to_string(),
category: "Category".to_string(),
user_id: other_user_id.to_string(),
};
let other_task = IngestionTask::new(other_payload, other_user_id.to_string()).await;
db.store_item(other_task).await.expect("store other");
let tasks = User::get_all_ingestion_tasks(user_id, &db)
.await
.expect("fetch all tasks");
assert_eq!(tasks.len(), 2);
assert_eq!(tasks[0].id, second.id); // newest first
assert_eq!(tasks[1].id, first.id);
}
#[tokio::test] #[tokio::test]
async fn test_find_by_email() { async fn test_find_by_email() {
// Setup test database // Setup test database

File diff suppressed because one or more lines are too long

View File

@@ -4,6 +4,7 @@ use axum::{
http::{header, HeaderMap, HeaderValue, StatusCode}, http::{header, HeaderMap, HeaderValue, StatusCode},
response::IntoResponse, response::IntoResponse,
}; };
use chrono::{DateTime, Utc};
use futures::try_join; use futures::try_join;
use serde::Serialize; use serde::Serialize;
@@ -139,6 +140,32 @@ pub struct ActiveJobsData {
pub user: User, pub user: User,
} }
#[derive(Serialize)]
struct TaskArchiveEntry {
id: String,
state_label: String,
state_raw: String,
attempts: u32,
max_attempts: u32,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
scheduled_at: DateTime<Utc>,
locked_at: Option<DateTime<Utc>>,
last_error_at: Option<DateTime<Utc>>,
error_message: Option<String>,
worker_id: Option<String>,
priority: i32,
lease_duration_secs: i64,
content_kind: String,
content_summary: String,
}
#[derive(Serialize)]
struct TaskArchiveData {
user: User,
tasks: Vec<TaskArchiveEntry>,
}
pub async fn delete_job( pub async fn delete_job(
State(state): State<HtmlState>, State(state): State<HtmlState>,
RequireUser(user): RequireUser, RequireUser(user): RequireUser,
@@ -173,6 +200,70 @@ pub async fn show_active_jobs(
)) ))
} }
pub async fn show_task_archive(
State(state): State<HtmlState>,
RequireUser(user): RequireUser,
) -> Result<impl IntoResponse, HtmlError> {
let tasks = User::get_all_ingestion_tasks(&user.id, &state.db).await?;
let entries: Vec<TaskArchiveEntry> = tasks
.into_iter()
.map(|task| {
let (content_kind, content_summary) = summarize_task_content(&task);
TaskArchiveEntry {
id: task.id.clone(),
state_label: task.state.display_label().to_string(),
state_raw: task.state.as_str().to_string(),
attempts: task.attempts,
max_attempts: task.max_attempts,
created_at: task.created_at,
updated_at: task.updated_at,
scheduled_at: task.scheduled_at,
locked_at: task.locked_at,
last_error_at: task.last_error_at,
error_message: task.error_message.clone(),
worker_id: task.worker_id.clone(),
priority: task.priority,
lease_duration_secs: task.lease_duration_secs,
content_kind,
content_summary,
}
})
.collect();
Ok(TemplateResponse::new_template(
"dashboard/task_archive_modal.html",
TaskArchiveData {
user,
tasks: entries,
},
))
}
fn summarize_task_content(task: &IngestionTask) -> (String, String) {
match &task.content {
common::storage::types::ingestion_payload::IngestionPayload::Text { text, .. } => {
("Text".to_string(), truncate_summary(text, 80))
}
common::storage::types::ingestion_payload::IngestionPayload::Url { url, .. } => {
("URL".to_string(), url.to_string())
}
common::storage::types::ingestion_payload::IngestionPayload::File { file_info, .. } => {
("File".to_string(), file_info.file_name.clone())
}
}
}
fn truncate_summary(input: &str, max_chars: usize) -> String {
if input.chars().count() <= max_chars {
input.to_string()
} else {
let truncated: String = input.chars().take(max_chars).collect();
format!("{truncated}")
}
}
pub async fn serve_file( pub async fn serve_file(
State(state): State<HtmlState>, State(state): State<HtmlState>,
RequireUser(user): RequireUser, RequireUser(user): RequireUser,

View File

@@ -5,7 +5,9 @@ use axum::{
routing::{delete, get}, routing::{delete, get},
Router, Router,
}; };
use handlers::{delete_job, delete_text_content, index_handler, serve_file, show_active_jobs}; use handlers::{
delete_job, delete_text_content, index_handler, serve_file, show_active_jobs, show_task_archive,
};
use crate::html_state::HtmlState; use crate::html_state::HtmlState;
@@ -24,6 +26,7 @@ where
{ {
Router::new() Router::new()
.route("/jobs/{job_id}", delete(delete_job)) .route("/jobs/{job_id}", delete(delete_job))
.route("/jobs/archive", get(show_task_archive))
.route("/active-jobs", get(show_active_jobs)) .route("/active-jobs", get(show_active_jobs))
.route("/text-content/{id}", delete(delete_text_content)) .route("/text-content/{id}", delete(delete_text_content))
.route("/file/{id}", get(serve_file)) .route("/file/{id}", get(serve_file))

View File

@@ -2,10 +2,16 @@
<section id="active_jobs_section" class="nb-panel p-4 space-y-4 mt-6 sm:mt-8"> <section id="active_jobs_section" class="nb-panel p-4 space-y-4 mt-6 sm:mt-8">
<header class="flex flex-wrap items-center justify-between gap-3"> <header class="flex flex-wrap items-center justify-between gap-3">
<h2 class="text-xl font-extrabold tracking-tight">Active Tasks</h2> <h2 class="text-xl font-extrabold tracking-tight">Active Tasks</h2>
<button class="nb-btn btn-square btn-sm" hx-get="/active-jobs" hx-target="#active_jobs_section" hx-swap="outerHTML" <div class="flex gap-2">
aria-label="Refresh active tasks"> <button class="nb-btn btn-square btn-sm" hx-get="/active-jobs" hx-target="#active_jobs_section" hx-swap="outerHTML"
{% include "icons/refresh_icon.html" %} aria-label="Refresh active tasks">
</button> {% include "icons/refresh_icon.html" %}
</button>
<button class="nb-btn btn-sm" hx-get="/jobs/archive" hx-target="#modal" hx-swap="innerHTML"
aria-label="View task archive">
View Archive
</button>
</div>
</header> </header>
{% if active_jobs %} {% if active_jobs %}
<ul class="flex flex-col gap-3 list-none p-0 m-0"> <ul class="flex flex-col gap-3 list-none p-0 m-0">

View File

@@ -0,0 +1,152 @@
{% extends "modal_base.html" %}
{% block modal_class %}w-11/12 max-w-[90ch] max-h-[95%] overflow-y-auto{% endblock %}
{% block form_attributes %}onsubmit="event.preventDefault();"{% endblock %}
{% block modal_content %}
<h3 class="text-xl font-extrabold tracking-tight flex items-center gap-2">
Ingestion Task Archive
<span class="badge badge-neutral text-xs font-normal">{{ tasks|length }} total</span>
</h3>
<p class="text-sm opacity-70">A history of all ingestion tasks for {{ user.email }}.</p>
{% if tasks %}
<div class="hidden lg:block overflow-x-auto nb-card mt-4">
<table class="nb-table">
<thead>
<tr>
<th class="text-left">Content</th>
<th class="text-left">State</th>
<th class="text-left">Attempts</th>
<th class="text-left">Scheduled</th>
<th class="text-left">Updated</th>
<th class="text-left">Worker</th>
<th class="text-left">Error</th>
</tr>
</thead>
<tbody>
{% for task in tasks %}
<tr>
<td>
<div class="flex flex-col gap-1">
<div class="text-sm font-semibold">{{ task.content_kind }}</div>
<div class="text-xs opacity-70 break-words">{{ task.content_summary }}</div>
<div class="text-[11px] opacity-60 lowercase tracking-wider">{{ task.id }}</div>
</div>
</td>
<td>
<span class="badge badge-primary badge-outline tracking-wide">{{ task.state_label }}</span>
</td>
<td>
<div class="text-sm font-semibold">{{ task.attempts }} / {{ task.max_attempts }}</div>
<div class="text-xs opacity-60">Priority {{ task.priority }}</div>
</td>
<td>
<div class="text-sm">
{{ task.scheduled_at|datetimeformat(format="short", tz=user.timezone) }}
</div>
{% if task.locked_at %}
<div class="text-xs opacity-60">Locked {{ task.locked_at|datetimeformat(format="short", tz=user.timezone) }}
</div>
{% endif %}
</td>
<td>
<div class="text-sm">
{{ task.updated_at|datetimeformat(format="short", tz=user.timezone) }}
</div>
<div class="text-xs opacity-60">Created {{ task.created_at|datetimeformat(format="short", tz=user.timezone) }}
</div>
</td>
<td>
{% if task.worker_id %}
<span class="text-sm font-semibold">{{ task.worker_id }}</span>
<div class="text-xs opacity-60">Lease {{ task.lease_duration_secs }}s</div>
{% else %}
<span class="text-xs opacity-60">Not assigned</span>
{% endif %}
</td>
<td>
{% if task.error_message %}
<div class="text-sm text-error font-semibold">{{ task.error_message }}</div>
{% if task.last_error_at %}
<div class="text-xs opacity-60">{{ task.last_error_at|datetimeformat(format="short", tz=user.timezone) }}
</div>
{% endif %}
{% else %}
<span class="text-xs opacity-60"></span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="lg:hidden flex flex-col gap-3 mt-4">
{% for task in tasks %}
<details class="nb-panel p-3 space-y-3">
<summary class="flex items-center justify-between gap-2 text-sm font-semibold cursor-pointer">
<span>{{ task.content_kind }}</span>
<span class="badge badge-primary badge-outline tracking-wide">{{ task.state_label }}</span>
</summary>
<div class="text-xs opacity-70 break-words">{{ task.content_summary }}</div>
<div class="text-[11px] opacity-60 lowercase tracking-wider">{{ task.id }}</div>
<div class="grid grid-cols-1 gap-2 text-xs">
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Attempts</span>
<span class="text-sm font-semibold">{{ task.attempts }} / {{ task.max_attempts }}</span>
</div>
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Priority</span>
<span class="text-sm font-semibold">{{ task.priority }}</span>
</div>
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Scheduled</span>
<span>{{ task.scheduled_at|datetimeformat(format="short", tz=user.timezone) }}</span>
</div>
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Updated</span>
<span>{{ task.updated_at|datetimeformat(format="short", tz=user.timezone) }}</span>
</div>
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Created</span>
<span>{{ task.created_at|datetimeformat(format="short", tz=user.timezone) }}</span>
</div>
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Worker</span>
{% if task.worker_id %}
<span class="text-sm font-semibold">{{ task.worker_id }}</span>
{% else %}
<span class="opacity-60">Unassigned</span>
{% endif %}
</div>
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Lease</span>
<span>{{ task.lease_duration_secs }}s</span>
</div>
{% if task.locked_at %}
<div class="flex justify-between">
<span class="opacity-60 uppercase tracking-wide">Locked</span>
<span>{{ task.locked_at|datetimeformat(format="short", tz=user.timezone) }}</span>
</div>
{% endif %}
</div>
{% if task.error_message or task.last_error_at %}
<div class="border-t border-base-200 pt-2 text-xs space-y-1">
{% if task.error_message %}
<div class="text-sm text-error font-semibold">{{ task.error_message }}</div>
{% endif %}
{% if task.last_error_at %}
<div class="opacity-60">Last error {{ task.last_error_at|datetimeformat(format="short", tz=user.timezone) }}</div>
{% endif %}
</div>
{% endif %}
</details>
{% endfor %}
</div>
{% else %}
<p class="text-sm opacity-70 mt-4">No tasks yet. Start an ingestion to populate the archive.</p>
{% endif %}
{% endblock %}
{% block primary_actions %}{% endblock %}