feat: readability parsing, screenshot of page, file serving

This commit is contained in:
Per Stark
2025-04-30 08:06:18 +02:00
parent 776a454a88
commit 02198dc21a
20 changed files with 707 additions and 309 deletions

View File

@@ -31,6 +31,7 @@ tower-http = { version = "0.6.2", features = ["fs"] }
chrono-tz = "0.10.1"
tower-serve-static = "0.1.1"
include_dir = "0.7.4"
tokio-util = { version = "0.7.15", features = ["io"] }
common = { path = "../common" }
composite-retrieval = { path = "../composite-retrieval" }

View File

@@ -1,9 +1,12 @@
use axum::{
body::Body,
extract::{Path, State},
http::{header, HeaderMap, HeaderValue, StatusCode},
response::IntoResponse,
};
use serde::Serialize;
use tokio::join;
use tokio::{fs::File, join};
use tokio_util::io::ReaderStream;
use crate::{
middlewares::{
@@ -15,9 +18,15 @@ use crate::{
use common::{
error::AppError,
storage::types::{
conversation::Conversation, file_info::FileInfo, ingestion_task::IngestionTask,
knowledge_entity::KnowledgeEntity, knowledge_relationship::KnowledgeRelationship,
text_chunk::TextChunk, text_content::TextContent, user::User,
conversation::Conversation,
file_info::{FileError, FileInfo},
ingestion_task::IngestionTask,
knowledge_entity::KnowledgeEntity,
knowledge_relationship::KnowledgeRelationship,
text_chunk::TextChunk,
text_content::TextContent,
user::User,
StoredObject,
},
};
@@ -167,3 +176,49 @@ pub async fn show_active_jobs(
},
))
}
pub async fn serve_file(
State(state): State<HtmlState>,
RequireUser(user): RequireUser,
Path(file_id): Path<String>,
) -> Result<impl IntoResponse, HtmlError> {
let file_info = match FileInfo::get_by_id(&file_id, &state.db).await {
Ok(info) => info,
_ => return Ok(TemplateResponse::not_found().into_response()),
};
if file_info.user_id != user.id {
return Ok(TemplateResponse::unauthorized().into_response());
}
// 3. Open the file asynchronously from the stored path
let path = std::path::Path::new(&file_info.path);
let file = match File::open(path).await {
Ok(f) => f,
Err(e) => return Ok(TemplateResponse::server_error().into_response()),
};
let stream = ReaderStream::new(file);
let body = Body::from_stream(stream);
let mut headers = HeaderMap::new();
headers.insert(
header::CONTENT_TYPE,
HeaderValue::from_str(&file_info.mime_type)
.unwrap_or_else(|_| HeaderValue::from_static("application/octet-stream")),
);
let Ok(disposition_value) =
HeaderValue::from_str(&format!("attachment; filename=\"{}\"", file_info.file_name))
else {
headers.insert(
header::CONTENT_DISPOSITION,
HeaderValue::from_static("attachment"),
);
return Ok((StatusCode::OK, headers, body).into_response());
};
headers.insert(header::CONTENT_DISPOSITION, disposition_value);
// 5. Return the response
Ok((StatusCode::OK, headers, body).into_response())
}

View File

@@ -5,7 +5,7 @@ use axum::{
routing::{delete, get},
Router,
};
use handlers::{delete_job, delete_text_content, index_handler, show_active_jobs};
use handlers::{delete_job, delete_text_content, index_handler, serve_file, show_active_jobs};
use crate::html_state::HtmlState;
@@ -26,4 +26,5 @@ where
.route("/jobs/{job_id}", delete(delete_job))
.route("/active-jobs", get(show_active_jobs))
.route("/text-content/{id}", delete(delete_text_content))
.route("/file/{id}", get(serve_file))
}

View File

@@ -54,9 +54,9 @@
<select name="query_model" class="select select-bordered w-full">
<option value="gpt-4o-mini" {% if settings.query_model=="gpt-4o-mini" %}selected{% endif %}>GPT-4o Mini
</option>
<option value="gpt-4o" {% if settings.query_model=="gpt-4o" %}selected{% endif %}>GPT-4o</option>
<option value="gpt-3.5-turbo" {% if settings.query_model=="gpt-3.5-turbo" %}selected{% endif %}>GPT-3.5
Turbo</option>
<option value="gpt-4.1" {% if settings.query_model=="gpt-4.1" %}selected{% endif %}>GPT-4.1</option>
<option value="gpt-4.1-mini" {% if settings.query_model=="gpt-4.1-mini" %}selected{% endif %}>GPT-4.1-mini
</option>
</select>
<p class="text-xs text-gray-500 mt-1">Model used for answering user queries</p>
</div>
@@ -66,11 +66,11 @@
<span class="label-text">Processing Model</span>
</label>
<select name="processing_model" class="select select-bordered w-full">
<option value="gpt-4o-mini" {% if settings.processing_model=="gpt-4o-mini" %}selected{% endif %}>GPT-4o Mini
<option value="gpt-4o-mini" {% if settings.query_model=="gpt-4o-mini" %}selected{% endif %}>GPT-4o Mini
</option>
<option value="gpt-4.1" {% if settings.query_model=="gpt-4.1" %}selected{% endif %}>GPT-4.1</option>
<option value="gpt-4.1-mini" {% if settings.query_model=="gpt-4.1-mini" %}selected{% endif %}>GPT-4.1-mini
</option>
<option value="gpt-4o" {% if settings.processing_model=="gpt-4o" %}selected{% endif %}>GPT-4o</option>
<option value="gpt-3.5-turbo" {% if settings.processing_model=="gpt-3.5-turbo" %}selected{% endif %}>GPT-3.5
Turbo</option>
</select>
<p class="text-xs text-gray-500 mt-1">Model used for content processing and ingestion</p>
</div>

View File

@@ -1,6 +1,7 @@
<div class="grid sm:grid-cols-2 lg:grid-cols-3 gap-4" id="text_content_cards">
{% for text_content in text_contents %}
<div class="card min-w-72 bg-base-100 shadow">
<img class="rounded-t-md overflow-clip" src="/file/{{text_content.url_info.image_id}}" />
<div class="card-body">
<div class="flex justify-between space-x-2">
<h2 class="card-title truncate">

View File

@@ -4,7 +4,7 @@
{% for item in latest_text_contents %}
<li class="list-row">
<div class="bg-accent rounded-box size-10 flex justify-center items-center text-accent-content">
{% if item.url %}
{% if item.url_info %}
{% include "icons/globe_icon.html" %}
{% elif item.file_info %}
{% include "icons/document_icon.html" %}
@@ -14,8 +14,8 @@
</div>
<div>
<div class="truncate max-w-[160px]">
{% if item.url %}
{{item.url}}
{% if item.url_info %}
{{item.url_info.title}}
{% elif item.file_info%}
{{item.file_info.file_name}}
{% else %}

View File

@@ -31,6 +31,10 @@
</a>
</li>
{% endfor %}
<li>
<button class="btn btn-primary" hx-get="/ingress-form" hx-target="#modal" hx-swap="innerHTML">Add
Content</button>
</li>
<div class="divider "></div>
</div>