feat: customizable data storage path

This commit is contained in:
Per Stark
2025-05-09 23:28:36 +02:00
parent 89badb3bed
commit c49005c258
16 changed files with 261 additions and 40 deletions
+9 -3
View File
@@ -16,7 +16,7 @@ use common::{
text_content::TextContent,
},
},
utils::embedding::generate_embedding,
utils::{config::AppConfig, embedding::generate_embedding},
};
use crate::{
@@ -27,14 +27,20 @@ use crate::{
pub struct IngestionPipeline {
db: Arc<SurrealDbClient>,
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
}
impl IngestionPipeline {
pub async fn new(
db: Arc<SurrealDbClient>,
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
) -> Result<Self, AppError> {
Ok(Self { db, openai_client })
Ok(Self {
db,
openai_client,
config,
})
}
pub async fn process_task(&self, task: IngestionTask) -> Result<(), AppError> {
let current_attempts = match task.status {
@@ -53,7 +59,7 @@ impl IngestionPipeline {
)
.await?;
let text_content = to_text_content(task.content, &self.db).await?;
let text_content = to_text_content(task.content, &self.db, &self.config).await?;
match self.process(&text_content).await {
Ok(_) => {
+5 -2
View File
@@ -14,6 +14,7 @@ use common::{
ingestion_payload::IngestionPayload,
text_content::{TextContent, UrlInfo},
},
utils::config::AppConfig,
};
use dom_smoothie::{Article, Readability, TextMode};
use headless_chrome::Browser;
@@ -24,6 +25,7 @@ use tracing::{error, info};
pub async fn to_text_content(
ingestion_payload: IngestionPayload,
db: &SurrealDbClient,
config: &AppConfig,
) -> Result<TextContent, AppError> {
match ingestion_payload {
IngestionPayload::Url {
@@ -32,7 +34,7 @@ pub async fn to_text_content(
category,
user_id,
} => {
let (article, file_info) = fetch_article_from_url(&url, db, &user_id).await?;
let (article, file_info) = fetch_article_from_url(&url, db, &user_id, &config).await?;
Ok(TextContent::new(
article.text_content.into(),
Some(context),
@@ -101,6 +103,7 @@ async fn fetch_article_from_url(
url: &str,
db: &SurrealDbClient,
user_id: &str,
config: &AppConfig,
) -> Result<(Article, FileInfo), AppError> {
info!("Fetching URL: {}", url);
// Instantiate timer
@@ -173,7 +176,7 @@ async fn fetch_article_from_url(
};
// Store screenshot
let file_info = FileInfo::new(field_data, db, user_id).await?;
let file_info = FileInfo::new(field_data, db, user_id, &config).await?;
// Parse content...
let config = dom_smoothie::Config {