mirror of
https://github.com/perstarkse/minne.git
synced 2026-04-25 10:18:38 +02:00
feat: handles submitted audio
This commit is contained in:
@@ -22,7 +22,7 @@ use std::io::{Seek, SeekFrom};
|
||||
use tempfile::NamedTempFile;
|
||||
use tracing::{error, info};
|
||||
|
||||
use crate::utils::image_parsing::extract_text_from_image;
|
||||
use crate::utils::{audio_transcription::transcribe_audio_file, image_parsing::extract_text_from_image};
|
||||
|
||||
pub async fn to_text_content(
|
||||
ingestion_payload: IngestionPayload,
|
||||
@@ -231,6 +231,10 @@ async fn extract_text_from_file(
|
||||
let content = tokio::fs::read_to_string(&file_info.path).await?;
|
||||
Ok(content)
|
||||
}
|
||||
"audio/mpeg" | "audio/mp3" | "audio/wav" | "audio/x-wav" | "audio/webm" | "audio/mp4" | "audio/ogg" | "audio/flac" => {
|
||||
|
||||
transcribe_audio_file(&file_info.path, db_client, openai_client).await
|
||||
}
|
||||
// Handle other MIME types as needed
|
||||
_ => Err(AppError::NotFound(file_info.mime_type.clone())),
|
||||
}
|
||||
|
||||
32
ingestion-pipeline/src/utils/audio_transcription.rs
Normal file
32
ingestion-pipeline/src/utils/audio_transcription.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use async_openai::types::{CreateTranscriptionRequestArgs, AudioResponseFormat};
|
||||
use common::{
|
||||
error::AppError,
|
||||
storage::{
|
||||
db::SurrealDbClient,
|
||||
types::system_settings::SystemSettings,
|
||||
},
|
||||
};
|
||||
|
||||
/// Transcribes an audio file using the configured OpenAI Whisper model.
|
||||
pub async fn transcribe_audio_file(
|
||||
file_path: &str,
|
||||
db_client: &SurrealDbClient,
|
||||
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
|
||||
) -> Result<String, AppError> {
|
||||
let system_settings = SystemSettings::get_current(db_client).await?;
|
||||
let model = system_settings.voice_processing_model;
|
||||
|
||||
let request = CreateTranscriptionRequestArgs::default()
|
||||
.file(file_path)
|
||||
.model(model)
|
||||
.response_format(AudioResponseFormat::Json)
|
||||
.build()?;
|
||||
|
||||
let response = openai_client
|
||||
.audio()
|
||||
.transcribe(request)
|
||||
.await
|
||||
.map_err(|e| AppError::Processing(format!("Audio transcription failed: {}", e)))?;
|
||||
Ok(response.text)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod image_parsing;
|
||||
pub mod llm_instructions;
|
||||
pub mod audio_transcription;
|
||||
|
||||
use common::error::AppError;
|
||||
use std::collections::HashMap;
|
||||
|
||||
Reference in New Issue
Block a user