From 33300d31932e5f2ebc01aa55a7bdc0c8455ca9ac Mon Sep 17 00:00:00 2001
From: Per Stark
Date: Tue, 29 Jul 2025 18:39:26 +0200
Subject: [PATCH] feat: handles submitted audio
---
README.md | 10 +++---
..._processing_model_to_system_settings.surql | 5 +++
...e_processing_model_to_system_settings.json | 1 +
common/schemas/system_settings.surql | 1 +
common/src/storage/types/system_settings.rs | 1 +
html-router/src/routes/admin/handlers.rs | 2 ++
html-router/templates/admin/base.html | 16 ++++++++++
ingestion-pipeline/src/types/mod.rs | 6 +++-
.../src/utils/audio_transcription.rs | 32 +++++++++++++++++++
ingestion-pipeline/src/utils/mod.rs | 1 +
10 files changed, 69 insertions(+), 6 deletions(-)
create mode 100644 common/migrations/20250701_000000_add_voice_processing_model_to_system_settings.surql
create mode 100644 common/migrations/definitions/20250701_000000_add_voice_processing_model_to_system_settings.json
create mode 100644 ingestion-pipeline/src/utils/audio_transcription.rs
diff --git a/README.md b/README.md
index c31cd05..6dd9811 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
## Demo deployment
-To test *Minne* out, enter [this](https://minne-demo.stark.pub) read-only demo deployment to view and test functionality out.
+To test _Minne_ out, enter [this](https://minne-demo.stark.pub) read-only demo deployment to view and test functionality out.
## The "Why" Behind Minne
@@ -70,7 +70,7 @@ This is a great way to manage Minne and its SurrealDB dependency together.
1. Create a `docker-compose.yml` file:
```yaml
- version: '3.8'
+ version: "3.8"
services:
minne:
image: ghcr.io/perstarkse/minne:latest # Pulls the latest pre-built image
@@ -88,7 +88,7 @@ This is a great way to manage Minne and its SurrealDB dependency together.
SURREALDB_DATABASE: "minne_db"
SURREALDB_NAMESPACE: "minne_ns"
OPENAI_API_KEY: "your_openai_api_key_here" # IMPORTANT: Replace with your actual key
- #OPENAI_BASE_URL: "your_ollama_address" # Uncomment this and change it to override the default openai base url
+ #OPENAI_BASE_URL: "your_ollama_address" # Uncomment this and change it to override the default openai base url
HTTP_PORT: 3000
DATA_DIR: "/data" # Data directory inside the container
RUST_LOG: "minne=info,tower_http=info" # Example logging level
@@ -177,7 +177,7 @@ Binaries for Windows, macOS, and Linux (combined `main` version) are available o
```bash
cargo run --release --bin worker
```
- The compiled binaries will be in `target/release/`.
+ The compiled binaries will be in `target/release/`.
## Configuration
@@ -229,7 +229,7 @@ Once Minne is running:
1. Access the web interface at `http://localhost:3000` (or your configured port).
1. On iOS, consider setting up the [Minne iOS Shortcut](https://www.icloud.com/shortcuts/9aa960600ec14329837ba4169f57a166) for effortless content sending. **Add the shortcut, replace the [insert_url] and the [insert_api_key] snippets**.
-1. Start adding notes, URLs and explore your growing knowledge graph.
+1. Add notes, URLs, **audio files**, and explore your growing knowledge graph.
1. Engage with the chat interface to query your saved content.
1. Try the experimental visual graph explorer to see connections.
diff --git a/common/migrations/20250701_000000_add_voice_processing_model_to_system_settings.surql b/common/migrations/20250701_000000_add_voice_processing_model_to_system_settings.surql
new file mode 100644
index 0000000..92eda2d
--- /dev/null
+++ b/common/migrations/20250701_000000_add_voice_processing_model_to_system_settings.surql
@@ -0,0 +1,5 @@
+DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;
+
+UPDATE system_settings:current SET
+ voice_processing_model = "whisper-1"
+WHERE voice_processing_model == NONE;
\ No newline at end of file
diff --git a/common/migrations/definitions/20250701_000000_add_voice_processing_model_to_system_settings.json b/common/migrations/definitions/20250701_000000_add_voice_processing_model_to_system_settings.json
new file mode 100644
index 0000000..b948e59
--- /dev/null
+++ b/common/migrations/definitions/20250701_000000_add_voice_processing_model_to_system_settings.json
@@ -0,0 +1 @@
+{"schemas":"--- original\n+++ modified\n@@ -160,6 +160,7 @@\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}
\ No newline at end of file
diff --git a/common/schemas/system_settings.surql b/common/schemas/system_settings.surql
index 71254bd..e53a75e 100644
--- a/common/schemas/system_settings.surql
+++ b/common/schemas/system_settings.surql
@@ -13,3 +13,4 @@ DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;
DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;
DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;
+DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;
diff --git a/common/src/storage/types/system_settings.rs b/common/src/storage/types/system_settings.rs
index 9a28655..04c9e0b 100644
--- a/common/src/storage/types/system_settings.rs
+++ b/common/src/storage/types/system_settings.rs
@@ -17,6 +17,7 @@ pub struct SystemSettings {
pub ingestion_system_prompt: String,
pub image_processing_model: String,
pub image_processing_prompt: String,
+ pub voice_processing_model: String,
}
impl StoredObject for SystemSettings {
diff --git a/html-router/src/routes/admin/handlers.rs b/html-router/src/routes/admin/handlers.rs
index 53f50ca..1c03b94 100644
--- a/html-router/src/routes/admin/handlers.rs
+++ b/html-router/src/routes/admin/handlers.rs
@@ -128,6 +128,7 @@ pub struct ModelSettingsInput {
query_model: String,
processing_model: String,
image_processing_model: String,
+ voice_processing_model: String,
embedding_model: String,
embedding_dimensions: Option,
}
@@ -159,6 +160,7 @@ pub async fn update_model_settings(
query_model: input.query_model,
processing_model: input.processing_model,
image_processing_model: input.image_processing_model,
+ voice_processing_model: input.voice_processing_model,
embedding_model: input.embedding_model,
// Use new dimensions if provided, otherwise retain the current ones.
embedding_dimensions: input
diff --git a/html-router/templates/admin/base.html b/html-router/templates/admin/base.html
index a158a8a..58e64b5 100644
--- a/html-router/templates/admin/base.html
+++ b/html-router/templates/admin/base.html
@@ -107,6 +107,22 @@
+
+
+