mirror of
https://github.com/perstarkse/minne.git
synced 2026-01-11 20:50:24 +01:00
feat: image ingestion
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -2819,6 +2819,7 @@ dependencies = [
|
||||
"async-openai",
|
||||
"axum",
|
||||
"axum_typed_multipart",
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
"common",
|
||||
"composite-retrieval",
|
||||
|
||||
@@ -53,6 +53,7 @@ tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
url = { version = "2.5.2", features = ["serde"] }
|
||||
uuid = { version = "1.10.0", features = ["v4", "serde"] }
|
||||
tokio-retry = "0.3.0"
|
||||
base64 = "0.22.1"
|
||||
|
||||
[profile.dist]
|
||||
inherits = "release"
|
||||
|
||||
@@ -7,6 +7,11 @@
|
||||
<img src="/file/{{text_content.url_info.image_id}}" alt="website screenshot" />
|
||||
</figure>
|
||||
{% endif %}
|
||||
{% if text_content.file_info.mime_type == "image/png" or text_content.file_info.mime_type == "image/jpeg" %}
|
||||
<figure>
|
||||
<img src="/file/{{text_content.file_info.id}}" alt="{{text_content.file_info.file_name}}" />
|
||||
</figure>
|
||||
{% endif %}
|
||||
<div class="card-body max-w-[95vw]">
|
||||
<h2 class="card-title truncate">
|
||||
{% if text_content.url_info %}
|
||||
|
||||
@@ -6,6 +6,11 @@
|
||||
{% if text_content.url_info.image_id %}
|
||||
<img class="rounded-t-md overflow-clip" src="/file/{{text_content.url_info.image_id}}" alt="Screenshot of the site" />
|
||||
{% endif %}
|
||||
{% if text_content.file_info.mime_type == "image/png" or text_content.file_info.mime_type == "image/jpeg" %}
|
||||
<figure>
|
||||
<img src="/file/{{text_content.file_info.id}}" alt="{{text_content.file_info.file_name}}" />
|
||||
</figure>
|
||||
{% endif %}
|
||||
<div id="reader-{{text_content.id}}" class="markdown-content prose" data-content="{{text_content.text | escape }}">
|
||||
{{text_content.text | escape }}
|
||||
</div>
|
||||
|
||||
@@ -22,9 +22,9 @@ text-splitter = { workspace = true }
|
||||
url = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
headless_chrome = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
|
||||
common = { path = "../common" }
|
||||
composite-retrieval = { path = "../composite-retrieval" }
|
||||
|
||||
[features]
|
||||
docker = []
|
||||
|
||||
@@ -59,7 +59,8 @@ impl IngestionPipeline {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let text_content = to_text_content(task.content, &self.db, &self.config).await?;
|
||||
let text_content =
|
||||
to_text_content(task.content, &self.db, &self.config, &self.openai_client).await?;
|
||||
|
||||
match self.process(&text_content).await {
|
||||
Ok(_) => {
|
||||
|
||||
@@ -22,10 +22,13 @@ use std::io::{Seek, SeekFrom};
|
||||
use tempfile::NamedTempFile;
|
||||
use tracing::{error, info};
|
||||
|
||||
use crate::utils::image_parsing::extract_text_from_image;
|
||||
|
||||
pub async fn to_text_content(
|
||||
ingestion_payload: IngestionPayload,
|
||||
db: &SurrealDbClient,
|
||||
config: &AppConfig,
|
||||
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
) -> Result<TextContent, AppError> {
|
||||
match ingestion_payload {
|
||||
IngestionPayload::Url {
|
||||
@@ -67,7 +70,7 @@ pub async fn to_text_content(
|
||||
category,
|
||||
user_id,
|
||||
} => {
|
||||
let text = extract_text_from_file(&file_info).await?;
|
||||
let text = extract_text_from_file(&file_info, db, openai_client).await?;
|
||||
Ok(TextContent::new(
|
||||
text,
|
||||
Some(context),
|
||||
@@ -195,7 +198,11 @@ async fn fetch_article_from_url(
|
||||
}
|
||||
|
||||
/// Extracts text from a file based on its MIME type.
|
||||
async fn extract_text_from_file(file_info: &FileInfo) -> Result<String, AppError> {
|
||||
async fn extract_text_from_file(
|
||||
file_info: &FileInfo,
|
||||
db_client: &SurrealDbClient,
|
||||
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
) -> Result<String, AppError> {
|
||||
match file_info.mime_type.as_str() {
|
||||
"text/plain" => {
|
||||
// Read the file and return its content
|
||||
@@ -212,8 +219,9 @@ async fn extract_text_from_file(file_info: &FileInfo) -> Result<String, AppError
|
||||
Err(AppError::NotFound(file_info.mime_type.clone()))
|
||||
}
|
||||
"image/png" | "image/jpeg" => {
|
||||
// TODO: Implement OCR on image using a crate like `tesseract`
|
||||
Err(AppError::NotFound(file_info.mime_type.clone()))
|
||||
let content =
|
||||
extract_text_from_image(&file_info.path, db_client, openai_client).await?;
|
||||
Ok(content)
|
||||
}
|
||||
"application/octet-stream" => {
|
||||
let content = tokio::fs::read_to_string(&file_info.path).await?;
|
||||
|
||||
62
ingestion-pipeline/src/utils/image_parsing.rs
Normal file
62
ingestion-pipeline/src/utils/image_parsing.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use async_openai::types::{
|
||||
ChatCompletionRequestMessageContentPartImageArgs,
|
||||
ChatCompletionRequestMessageContentPartTextArgs, ChatCompletionRequestUserMessageArgs,
|
||||
CreateChatCompletionRequestArgs, ImageDetail, ImageUrlArgs,
|
||||
};
|
||||
use base64::{engine::general_purpose::STANDARD, Engine as _};
|
||||
use common::{
|
||||
error::AppError,
|
||||
storage::{db::SurrealDbClient, types::system_settings::SystemSettings},
|
||||
};
|
||||
|
||||
pub async fn extract_text_from_image(
|
||||
path: &str,
|
||||
db: &SurrealDbClient,
|
||||
client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
) -> Result<String, AppError> {
|
||||
let system_settings = SystemSettings::get_current(db).await?;
|
||||
let image_bytes = tokio::fs::read(&path).await?;
|
||||
|
||||
let base64_image = STANDARD.encode(&image_bytes);
|
||||
|
||||
let image_url = format!("data:image/png;base64,{}", base64_image);
|
||||
|
||||
let request = CreateChatCompletionRequestArgs::default()
|
||||
.model(system_settings.processing_model)
|
||||
.max_tokens(6400_u32)
|
||||
.messages([ChatCompletionRequestUserMessageArgs::default()
|
||||
.content(vec![
|
||||
ChatCompletionRequestMessageContentPartTextArgs::default()
|
||||
.text(r#"Analyze this image and respond based on its primary content:
|
||||
- If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.
|
||||
- If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.
|
||||
- For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a "Text:" heading.
|
||||
|
||||
Respond directly with the analysis."#)
|
||||
.build()?
|
||||
.into(),
|
||||
ChatCompletionRequestMessageContentPartImageArgs::default()
|
||||
.image_url(
|
||||
ImageUrlArgs::default()
|
||||
.url(image_url)
|
||||
.detail(ImageDetail::High)
|
||||
.build()?,
|
||||
)
|
||||
.build()?
|
||||
.into(),
|
||||
])
|
||||
.build()?
|
||||
.into()])
|
||||
.build()?;
|
||||
|
||||
let response = client.chat().create(request).await?;
|
||||
|
||||
let description = response
|
||||
.choices
|
||||
.get(0)
|
||||
.and_then(|c| c.message.content.as_ref())
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "No description found.".to_string());
|
||||
|
||||
Ok(description)
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod image_parsing;
|
||||
pub mod llm_instructions;
|
||||
|
||||
use common::error::AppError;
|
||||
|
||||
Reference in New Issue
Block a user