feat: completed image ingestion with system settings

styling
This commit is contained in:
Per Stark
2025-06-27 21:40:44 +02:00
parent f1548d18db
commit 43263fa77e
11 changed files with 157 additions and 13 deletions

View File

@@ -0,0 +1,7 @@
DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;
UPDATE system_settings:current SET
image_processing_model = "gpt-4o-mini",
image_processing_prompt = "Analyze this image and respond based on its primary content:\n - If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.\n - If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.\n - For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a Text: heading.\n\n Respond directly with the analysis."
WHERE image_processing_model == NONE && image_processing_prompt == NONE;

View File

@@ -0,0 +1 @@
{"schemas":"--- original\n+++ modified\n@@ -157,10 +157,12 @@\n DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\n DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}

View File

@@ -7,7 +7,9 @@ DEFINE FIELD IF NOT EXISTS registrations_enabled ON system_settings TYPE bool;
DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;
DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;
DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;

View File

@@ -54,3 +54,10 @@ Guidelines:
7. Only create relationships between existing KnowledgeEntities.
8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.
9. A new relationship MUST include a newly created KnowledgeEntity."#;
pub static DEFAULT_IMAGE_PROCESSING_PROMPT: &str = r#"Analyze this image and respond based on its primary content:
- If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.
- If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.
- For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a "Text:" heading.
Respond directly with the analysis."#;

View File

@@ -15,6 +15,8 @@ pub struct SystemSettings {
pub embedding_dimensions: u32,
pub query_system_prompt: String,
pub ingestion_system_prompt: String,
pub image_processing_model: String,
pub image_processing_prompt: String,
}
impl StoredObject for SystemSettings {
@@ -78,6 +80,7 @@ mod tests {
assert_eq!(settings.require_email_verification, false);
assert_eq!(settings.query_model, "gpt-4o-mini");
assert_eq!(settings.processing_model, "gpt-4o-mini");
assert_eq!(settings.image_processing_model, "gpt-4o-mini");
// Dont test these for now, having a hard time getting the formatting exactly the same
// assert_eq!(
// settings.query_system_prompt,

File diff suppressed because one or more lines are too long

View File

@@ -8,7 +8,10 @@ use common::{
analytics::Analytics,
conversation::Conversation,
knowledge_entity::KnowledgeEntity,
system_prompts::{DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT, DEFAULT_QUERY_SYSTEM_PROMPT},
system_prompts::{
DEFAULT_IMAGE_PROCESSING_PROMPT, DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT,
DEFAULT_QUERY_SYSTEM_PROMPT,
},
system_settings::SystemSettings,
text_chunk::TextChunk,
user::User,
@@ -31,6 +34,7 @@ pub struct AdminPanelData {
analytics: Analytics,
users: i64,
default_query_prompt: String,
default_image_prompt: String,
conversation_archive: Vec<Conversation>,
available_models: ListModelResponse,
}
@@ -63,6 +67,7 @@ pub async fn show_admin_panel(
.map_err(|e| AppError::InternalError(e.to_string()))?,
users: user_count_res?,
default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
conversation_archive: conversation_archive_res?,
},
))
@@ -122,6 +127,7 @@ pub async fn toggle_registration_status(
pub struct ModelSettingsInput {
query_model: String,
processing_model: String,
image_processing_model: String,
embedding_model: String,
embedding_dimensions: Option<u32>,
}
@@ -152,6 +158,7 @@ pub async fn update_model_settings(
let new_settings = SystemSettings {
query_model: input.query_model,
processing_model: input.processing_model,
image_processing_model: input.image_processing_model,
embedding_model: input.embedding_model,
// Use new dimensions if provided, otherwise retain the current ones.
embedding_dimensions: input
@@ -339,3 +346,62 @@ pub async fn patch_ingestion_prompt(
},
))
}
#[derive(Serialize)]
pub struct ImagePromptEditData {
settings: SystemSettings,
default_image_prompt: String,
}
pub async fn show_edit_image_prompt(
State(state): State<HtmlState>,
RequireUser(user): RequireUser,
) -> Result<impl IntoResponse, HtmlError> {
// Early return if the user is not admin
if !user.admin {
return Ok(TemplateResponse::redirect("/"));
};
let settings = SystemSettings::get_current(&state.db).await?;
Ok(TemplateResponse::new_template(
"admin/edit_image_prompt_modal.html",
ImagePromptEditData {
settings,
default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
},
))
}
#[derive(Deserialize)]
pub struct ImagePromptUpdateInput {
image_processing_prompt: String,
}
pub async fn patch_image_prompt(
State(state): State<HtmlState>,
RequireUser(user): RequireUser,
Form(input): Form<ImagePromptUpdateInput>,
) -> Result<impl IntoResponse, HtmlError> {
// Early return if the user is not admin
if !user.admin {
return Ok(TemplateResponse::redirect("/"));
};
let current_settings = SystemSettings::get_current(&state.db).await?;
let new_settings = SystemSettings {
image_processing_prompt: input.image_processing_prompt,
..current_settings.clone()
};
SystemSettings::update(&state.db, new_settings.clone()).await?;
Ok(TemplateResponse::new_partial(
"admin/base.html",
"system_prompt_section",
SystemPromptSectionData {
settings: new_settings,
},
))
}

View File

@@ -5,8 +5,9 @@ use axum::{
Router,
};
use handlers::{
patch_ingestion_prompt, patch_query_prompt, show_admin_panel, show_edit_ingestion_prompt,
show_edit_system_prompt, toggle_registration_status, update_model_settings,
patch_image_prompt, patch_ingestion_prompt, patch_query_prompt, show_admin_panel,
show_edit_image_prompt, show_edit_ingestion_prompt, show_edit_system_prompt,
toggle_registration_status, update_model_settings,
};
use crate::html_state::HtmlState;
@@ -24,4 +25,6 @@ where
.route("/update-query-prompt", patch(patch_query_prompt))
.route("/edit-ingestion-prompt", get(show_edit_ingestion_prompt))
.route("/update-ingestion-prompt", patch(patch_ingestion_prompt))
.route("/edit-image-prompt", get(show_edit_image_prompt))
.route("/update-image-prompt", patch(patch_image_prompt))
}

View File

@@ -6,7 +6,7 @@
<main class="container flex-grow flex flex-col mx-auto mt-4 space-y-6">
<h1 class="text-2xl font-bold mb-2">Admin Dashboard</h1>
<div class="stats stats-vertical lg:stats-horizontal shadow">
<div class="stats stats-vertical md:stats-horizontal shadow">
<div class="stat">
<div class="stat-title font-bold">Page loads</div>
<div class="stat-value text-secondary">{{analytics.page_loads}}</div>
@@ -27,7 +27,7 @@
</div>
<!-- Settings in Fieldset -->
<div class="grid grid-cols-1 sm:grid-cols-2 gap-6">
<div class="grid grid-cols-1 xl:grid-cols-2 gap-6">
{% block system_prompt_section %}
<div id="system_prompt_section">
<fieldset class="fieldset p-4 shadow rounded-box">
@@ -41,6 +41,10 @@
hx-swap="innerHTML">
Edit Ingestion Prompt
</button>
<button type="button" class="btn btn-primary btn-sm" hx-get="/edit-image-prompt" hx-target="#modal"
hx-swap="innerHTML">
Edit Image Prompt
</button>
</div>
</fieldset>
</div>
@@ -85,6 +89,24 @@
</p>
</div>
<!-- Image Processing Model -->
<div class="form-control mb-4">
<label class="label">
<span class="label-text">Image Processing Model</span>
</label>
<select name="image_processing_model" class="select select-bordered w-full">
{% for model in available_models.data %}
<option value="{{model.id}}" {% if settings.image_processing_model==model.id %} selected {% endif %}>
{{model.id}}
</option>
{% endfor %}
</select>
<p class="text-xs text-gray-500 mt-1">
Current used:
<span class="font-mono">{{settings.image_processing_model}}</span>
</p>
</div>
<!-- Embedding Model -->
<div class="form-control mb-4">
<label class="label">

View File

@@ -0,0 +1,38 @@
{% extends "modal_base.html" %}
{% block form_attributes %}
hx-patch="/update-image-prompt"
hx-target="#system_prompt_section"
hx-swap="outerHTML"
{% endblock %}
{% block modal_content %}
<h3 class="text-lg font-bold mb-4">Edit Image Processing Prompt</h3>
<div class="form-control">
<textarea name="image_processing_prompt" class="textarea textarea-bordered h-96 w-full font-mono text-sm">{{
settings.image_processing_prompt }}</textarea>
<p class="text-xs text-gray-500 mt-1">System prompt used for processing images</p>
</div>
{% endblock %}
{% block primary_actions %}
<button type="button" class="btn btn-outline mr-2" id="reset_prompt_button">
Reset to Default
</button>
<textarea id="default_prompt_content" style="display:none;">{{ default_image_prompt }}</textarea>
<script>
document.getElementById('reset_prompt_button').addEventListener('click', function () {
const defaultContent = document.getElementById('default_prompt_content').value;
document.querySelector('textarea[name=image_processing_prompt]').value = defaultContent;
});
</script>
<button type="submit" class="btn btn-primary">
<span class="htmx-indicator hidden">
<span class="loading loading-spinner loading-xs mr-2"></span>
</span>
Save Changes
</button>
{% endblock %}

View File

@@ -22,17 +22,12 @@ pub async fn extract_text_from_image(
let image_url = format!("data:image/png;base64,{}", base64_image);
let request = CreateChatCompletionRequestArgs::default()
.model(system_settings.processing_model)
.model(system_settings.image_processing_model)
.max_tokens(6400_u32)
.messages([ChatCompletionRequestUserMessageArgs::default()
.content(vec![
ChatCompletionRequestMessageContentPartTextArgs::default()
.text(r#"Analyze this image and respond based on its primary content:
- If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.
- If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.
- For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a "Text:" heading.
Respond directly with the analysis."#)
.text(system_settings.image_processing_prompt)
.build()?
.into(),
ChatCompletionRequestMessageContentPartImageArgs::default()