feat: completed image ingestion with system settings

styling
2026-05-11 10:20:13 +02:00 · 2025-06-27 21:40:44 +02:00
parent f1548d18db
commit 43263fa77e
11 changed files with 157 additions and 13 deletions
--- a/common/migrations/20250627_120000_add_image_processing_settings.surql
+++ b/common/migrations/20250627_120000_add_image_processing_settings.surql
@@ -0,0 +1,7 @@
+DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;
+DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;
+
+UPDATE system_settings:current SET
+	image_processing_model =  "gpt-4o-mini",
+    image_processing_prompt = "Analyze this image and respond based on its primary content:\n - If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.\n - If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.\n - For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a Text: heading.\n\n Respond directly with the analysis."
+WHERE image_processing_model == NONE && image_processing_prompt == NONE;
--- a/common/migrations/definitions/20250627_120000_add_image_processing_settings.json
+++ b/common/migrations/definitions/20250627_120000_add_image_processing_settings.json
@@ -0,0 +1 @@
+{"schemas":"--- original\n+++ modified\n@@ -157,10 +157,12 @@\n DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\n DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}
--- a/common/schemas/system_settings.surql
+++ b/common/schemas/system_settings.surql
@@ -7,7 +7,9 @@ DEFINE FIELD IF NOT EXISTS registrations_enabled ON system_settings TYPE bool;
 DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;
 DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;
+DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;
 DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;
+DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;
--- a/common/src/storage/types/system_prompts.rs
+++ b/common/src/storage/types/system_prompts.rs
@@ -54,3 +54,10 @@ Guidelines:
 7. Only create relationships between existing KnowledgeEntities.
 8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.
 9. A new relationship MUST include a newly created KnowledgeEntity."#;
+
+pub static DEFAULT_IMAGE_PROCESSING_PROMPT: &str = r#"Analyze this image and respond based on its primary content:
+- If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.
+- If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.
+- For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a "Text:" heading.
+
+Respond directly with the analysis."#;
--- a/common/src/storage/types/system_settings.rs
+++ b/common/src/storage/types/system_settings.rs
@@ -15,6 +15,8 @@ pub struct SystemSettings {
    pub embedding_dimensions: u32,
    pub query_system_prompt: String,
    pub ingestion_system_prompt: String,
+    pub image_processing_model: String,
+    pub image_processing_prompt: String,
 }

 impl StoredObject for SystemSettings {
@@ -78,6 +80,7 @@ mod tests {
        assert_eq!(settings.require_email_verification, false);
        assert_eq!(settings.query_model, "gpt-4o-mini");
        assert_eq!(settings.processing_model, "gpt-4o-mini");
+        assert_eq!(settings.image_processing_model, "gpt-4o-mini");
        // Dont test these for now, having a hard time getting the formatting exactly the same
        // assert_eq!(
        //     settings.query_system_prompt,
--- a/html-router/assets/style.css
+++ b/html-router/assets/style.css
--- a/html-router/src/routes/admin/handlers.rs
+++ b/html-router/src/routes/admin/handlers.rs
@@ -8,7 +8,10 @@ use common::{
        analytics::Analytics,
        conversation::Conversation,
        knowledge_entity::KnowledgeEntity,
-        system_prompts::{DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT, DEFAULT_QUERY_SYSTEM_PROMPT},
+        system_prompts::{
+            DEFAULT_IMAGE_PROCESSING_PROMPT, DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT,
+            DEFAULT_QUERY_SYSTEM_PROMPT,
+        },
        system_settings::SystemSettings,
        text_chunk::TextChunk,
        user::User,
@@ -31,6 +34,7 @@ pub struct AdminPanelData {
    analytics: Analytics,
    users: i64,
    default_query_prompt: String,
+    default_image_prompt: String,
    conversation_archive: Vec<Conversation>,
    available_models: ListModelResponse,
 }
@@ -63,6 +67,7 @@ pub async fn show_admin_panel(
                .map_err(|e| AppError::InternalError(e.to_string()))?,
            users: user_count_res?,
            default_query_prompt: DEFAULT_QUERY_SYSTEM_PROMPT.to_string(),
+            default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
            conversation_archive: conversation_archive_res?,
        },
    ))
@@ -122,6 +127,7 @@ pub async fn toggle_registration_status(
 pub struct ModelSettingsInput {
    query_model: String,
    processing_model: String,
+    image_processing_model: String,
    embedding_model: String,
    embedding_dimensions: Option<u32>,
 }
@@ -152,6 +158,7 @@ pub async fn update_model_settings(
    let new_settings = SystemSettings {
        query_model: input.query_model,
        processing_model: input.processing_model,
+        image_processing_model: input.image_processing_model,
        embedding_model: input.embedding_model,
        // Use new dimensions if provided, otherwise retain the current ones.
        embedding_dimensions: input
@@ -339,3 +346,62 @@ pub async fn patch_ingestion_prompt(
        },
    ))
 }
+
+#[derive(Serialize)]
+pub struct ImagePromptEditData {
+    settings: SystemSettings,
+    default_image_prompt: String,
+}
+
+pub async fn show_edit_image_prompt(
+    State(state): State<HtmlState>,
+    RequireUser(user): RequireUser,
+) -> Result<impl IntoResponse, HtmlError> {
+    // Early return if the user is not admin
+    if !user.admin {
+        return Ok(TemplateResponse::redirect("/"));
+    };
+
+    let settings = SystemSettings::get_current(&state.db).await?;
+
+    Ok(TemplateResponse::new_template(
+        "admin/edit_image_prompt_modal.html",
+        ImagePromptEditData {
+            settings,
+            default_image_prompt: DEFAULT_IMAGE_PROCESSING_PROMPT.to_string(),
+        },
+    ))
+}
+
+#[derive(Deserialize)]
+pub struct ImagePromptUpdateInput {
+    image_processing_prompt: String,
+}
+
+pub async fn patch_image_prompt(
+    State(state): State<HtmlState>,
+    RequireUser(user): RequireUser,
+    Form(input): Form<ImagePromptUpdateInput>,
+) -> Result<impl IntoResponse, HtmlError> {
+    // Early return if the user is not admin
+    if !user.admin {
+        return Ok(TemplateResponse::redirect("/"));
+    };
+
+    let current_settings = SystemSettings::get_current(&state.db).await?;
+
+    let new_settings = SystemSettings {
+        image_processing_prompt: input.image_processing_prompt,
+        ..current_settings.clone()
+    };
+
+    SystemSettings::update(&state.db, new_settings.clone()).await?;
+
+    Ok(TemplateResponse::new_partial(
+        "admin/base.html",
+        "system_prompt_section",
+        SystemPromptSectionData {
+            settings: new_settings,
+        },
+    ))
+}
--- a/html-router/src/routes/admin/mod.rs
+++ b/html-router/src/routes/admin/mod.rs
@@ -5,8 +5,9 @@ use axum::{
    Router,
 };
 use handlers::{
-    patch_ingestion_prompt, patch_query_prompt, show_admin_panel, show_edit_ingestion_prompt,
-    show_edit_system_prompt, toggle_registration_status, update_model_settings,
+    patch_image_prompt, patch_ingestion_prompt, patch_query_prompt, show_admin_panel,
+    show_edit_image_prompt, show_edit_ingestion_prompt, show_edit_system_prompt,
+    toggle_registration_status, update_model_settings,
 };

 use crate::html_state::HtmlState;
@@ -24,4 +25,6 @@ where
        .route("/update-query-prompt", patch(patch_query_prompt))
        .route("/edit-ingestion-prompt", get(show_edit_ingestion_prompt))
        .route("/update-ingestion-prompt", patch(patch_ingestion_prompt))
+        .route("/edit-image-prompt", get(show_edit_image_prompt))
+        .route("/update-image-prompt", patch(patch_image_prompt))
 }
--- a/html-router/templates/admin/base.html
+++ b/html-router/templates/admin/base.html
@@ -6,7 +6,7 @@
 <main class="container flex-grow flex flex-col mx-auto mt-4 space-y-6">
  <h1 class="text-2xl font-bold mb-2">Admin Dashboard</h1>

-  <div class="stats stats-vertical lg:stats-horizontal shadow">
+  <div class="stats stats-vertical md:stats-horizontal shadow">
    <div class="stat">
      <div class="stat-title font-bold">Page loads</div>
      <div class="stat-value text-secondary">{{analytics.page_loads}}</div>
@@ -27,7 +27,7 @@
  </div>

  <!-- Settings in Fieldset -->
-  <div class="grid grid-cols-1 sm:grid-cols-2 gap-6">
+  <div class="grid grid-cols-1 xl:grid-cols-2 gap-6">
    {% block system_prompt_section %}
    <div id="system_prompt_section">
      <fieldset class="fieldset p-4 shadow rounded-box">
@@ -41,6 +41,10 @@
            hx-swap="innerHTML">
            Edit Ingestion Prompt
          </button>
+          <button type="button" class="btn btn-primary btn-sm" hx-get="/edit-image-prompt" hx-target="#modal"
+            hx-swap="innerHTML">
+            Edit Image Prompt
+          </button>
        </div>
      </fieldset>
    </div>
@@ -85,6 +89,24 @@
          </p>
        </div>

+        <!-- Image Processing Model -->
+        <div class="form-control mb-4">
+          <label class="label">
+            <span class="label-text">Image Processing Model</span>
+          </label>
+          <select name="image_processing_model" class="select select-bordered w-full">
+            {% for model in available_models.data %}
+            <option value="{{model.id}}" {% if settings.image_processing_model==model.id %} selected {% endif %}>
+              {{model.id}}
+            </option>
+            {% endfor %}
+          </select>
+          <p class="text-xs text-gray-500 mt-1">
+            Current used:
+            <span class="font-mono">{{settings.image_processing_model}}</span>
+          </p>
+        </div>
+
        <!-- Embedding Model -->
        <div class="form-control mb-4">
          <label class="label">
--- a/html-router/templates/admin/edit_image_prompt_modal.html
+++ b/html-router/templates/admin/edit_image_prompt_modal.html
@@ -0,0 +1,38 @@
+{% extends "modal_base.html" %}
+
+{% block form_attributes %}
+hx-patch="/update-image-prompt"
+hx-target="#system_prompt_section"
+hx-swap="outerHTML"
+{% endblock %}
+
+{% block modal_content %}
+<h3 class="text-lg font-bold mb-4">Edit Image Processing Prompt</h3>
+
+<div class="form-control">
+  <textarea name="image_processing_prompt" class="textarea textarea-bordered h-96 w-full font-mono text-sm">{{
+    settings.image_processing_prompt }}</textarea>
+  <p class="text-xs text-gray-500 mt-1">System prompt used for processing images</p>
+</div>
+{% endblock %}
+
+{% block primary_actions %}
+<button type="button" class="btn btn-outline mr-2" id="reset_prompt_button">
+  Reset to Default
+</button>
+
+<textarea id="default_prompt_content" style="display:none;">{{ default_image_prompt }}</textarea>
+<script>
+  document.getElementById('reset_prompt_button').addEventListener('click', function () {
+    const defaultContent = document.getElementById('default_prompt_content').value;
+    document.querySelector('textarea[name=image_processing_prompt]').value = defaultContent;
+  });
+</script>
+
+<button type="submit" class="btn btn-primary">
+  <span class="htmx-indicator hidden">
+    <span class="loading loading-spinner loading-xs mr-2"></span>
+  </span>
+  Save Changes
+</button>
+{% endblock %}
--- a/ingestion-pipeline/src/utils/image_parsing.rs
+++ b/ingestion-pipeline/src/utils/image_parsing.rs
@@ -22,17 +22,12 @@ pub async fn extract_text_from_image(
    let image_url = format!("data:image/png;base64,{}", base64_image);

    let request = CreateChatCompletionRequestArgs::default()
-        .model(system_settings.processing_model)
+        .model(system_settings.image_processing_model)
        .max_tokens(6400_u32)
        .messages([ChatCompletionRequestUserMessageArgs::default()
            .content(vec![
                ChatCompletionRequestMessageContentPartTextArgs::default()
-                    .text(r#"Analyze this image and respond based on its primary content:
-                            - If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.
-                            - If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.
-                            - For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a "Text:" heading.
-                            
-                            Respond directly with the analysis."#)
+                    .text(system_settings.image_processing_prompt)
                    .build()?
                    .into(),
                ChatCompletionRequestMessageContentPartImageArgs::default()
				`@@ -0,0 +1 @@`
				{"schemas":"--- original\n+++ modified\n@@ -157,10 +157,12 @@\n DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\n DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}