branching out llm to separate module

2026-05-13 03:10:33 +02:00 · 2024-10-08 07:54:08 +02:00
parent 43e5d4d629
commit b5f9317634
3 changed files with 104 additions and 97 deletions
--- a/src/models/text_content.rs
+++ b/src/models/text_content.rs
@@ -2,7 +2,7 @@ use async_openai::types::{ ChatCompletionRequestSystemMessage, ChatCompletionReq
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 use tracing::info;
-use crate::models::file_info::FileInfo;
+use crate::{models::file_info::FileInfo, utils::llm::create_json_ld};
 use thiserror::Error;

 /// Represents a single piece of text content extracted from various sources.
@@ -71,7 +71,7 @@ impl TextContent {
    /// Processes the `TextContent` by sending it to an LLM, storing in a graph DB, and vector DB.
    pub async fn process(&self) -> Result<(), ProcessingError> {
        // Step 1: Send to LLM for analysis
-        let analysis = self.send_to_llm().await?;
+        let analysis = create_json_ld(&self.category, &self.instructions, &self.text).await?;
        info!("{:?}", analysis);

        // Step 2: Store analysis results in Graph DB
@@ -83,100 +83,6 @@ impl TextContent {
        Ok(())
    }

-    /// Sends text to an LLM for analysis.
-    async fn send_to_llm(&self) -> Result<AnalysisResult, ProcessingError> {
-        let client = async_openai::Client::new();
-        let  schema = json!({
-          "type": "object",
-          "properties": {
-            "knowledge_sources": {
-              "type": "array",
-              "items": {
-                "type": "object",
-                "properties": {
-                  "id": {"type": "string"},
-                  "type": {"type": "string", "enum": ["Document", "Page", "TextSnippet"]},
-                  "title": {"type": "string"},
-                  "description": {"type": "string"},
-                  "relationships": {
-                    "type": "array",
-                    "items": {
-                      "type": "object",
-                      "properties": {
-                        "type": {"type": "string", "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]},
-                        "target": {"type": "string", "description": "ID of the related knowledge source"}
-                      },
-                      "required": ["type", "target"],
-                      "additionalProperties": false,
-                    }
-                  }
-                },
-                "required": ["id", "type", "title", "description", "relationships"],
-                "additionalProperties": false,
-              }
-            },
-            "category": {"type": "string"},
-            "instructions": {"type": "string"}
-          },
-          "required": ["knowledge_sources", "category", "instructions"],
-          "additionalProperties": false
-        });
-
-        let response_format = async_openai::types::ResponseFormat::JsonSchema {
-            json_schema: async_openai::types::ResponseFormatJsonSchema {
-                description: Some("Structured analysis of the submitted content".into()),
-                name: "content_analysis".into(),
-                schema: Some(schema),
-                strict: Some(true),
-            },
-        };
-
-        // Construct the system and user messages
-        let system_message = format!(
-            "You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON-LD object representing the content, a short description of the document, how it relates to the submitted category, and any relevant instructions."
-        );
-
-        let user_message = format!(
-            "Category: {}\nInstructions: {}\nContent:\n{}",
-            self.category, self.instructions, self.text
-        );
-
-        // Build the chat completion request
-        let request = CreateChatCompletionRequestArgs::default()
-            .model("gpt-4o-mini") 
-            .max_tokens(2048u32)
-            .messages([
-                ChatCompletionRequestSystemMessage::from(system_message).into(),
-                ChatCompletionRequestUserMessage::from(user_message).into(),
-            ])
-            .response_format(response_format)
-            .build().map_err(|e| ProcessingError::LLMError(e.to_string()))?;
-
-        // Send the request to OpenAI
-        let response = client.chat().create(request).await.map_err(|e| {
-            ProcessingError::LLMError(format!("OpenAI API request failed: {}", e.to_string()))
-        })?;
-
-        info!("{:?}", response);
-
-        // Extract and parse the response
-        for choice in response.choices {
-            if let Some(content) = choice.message.content {
-                let analysis: AnalysisResult = serde_json::from_str(&content).map_err(|e| {
-                    ProcessingError::LLMError(format!(
-                        "Failed to parse LLM response into LLMAnalysis: {}",
-                        e.to_string()
-                    ))
-                })?;
-                return Ok(analysis);
-            }
-        }
-
-        Err(ProcessingError::LLMError(
-            "No content found in LLM response".into(),
-        ))
-    }
-
    /// Stores analysis results in a graph database.
    async fn store_in_graph_db(&self, _analysis: &AnalysisResult) -> Result<(), ProcessingError> {
        // TODO: Implement storage logic for your specific graph database.
--- a/src/utils/llm.rs
+++ b/src/utils/llm.rs
@@ -0,0 +1,101 @@
+use async_openai::types::ChatCompletionRequestSystemMessage;
+use async_openai::types::ChatCompletionRequestUserMessage;
+use async_openai::types::CreateChatCompletionRequestArgs;
+use tracing::info;
+use crate::models::text_content::ProcessingError;
+use serde_json::json;
+use crate::models::text_content::AnalysisResult;
+
+/// Sends text to an LLM for analysis.
+pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> Result<AnalysisResult, ProcessingError> {
+        let client = async_openai::Client::new();
+        let  schema = json!({
+          "type": "object",
+          "properties": {
+            "knowledge_sources": {
+              "type": "array",
+              "items": {
+                "type": "object",
+                "properties": {
+                  "id": {"type": "string"},
+                  "type": {"type": "string", "enum": ["Document", "Page", "TextSnippet"]},
+                  "title": {"type": "string"},
+                  "description": {"type": "string"},
+                  "relationships": {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "type": {"type": "string", "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]},
+                        "target": {"type": "string", "description": "ID of the related knowledge source"}
+                      },
+                      "required": ["type", "target"],
+                      "additionalProperties": false,
+                    }
+                  }
+                },
+                "required": ["id", "type", "title", "description", "relationships"],
+                "additionalProperties": false,
+              }
+            },
+            "category": {"type": "string"},
+            "instructions": {"type": "string"}
+          },
+          "required": ["knowledge_sources", "category", "instructions"],
+          "additionalProperties": false
+        });
+
+        let response_format = async_openai::types::ResponseFormat::JsonSchema {
+            json_schema: async_openai::types::ResponseFormatJsonSchema {
+                description: Some("Structured analysis of the submitted content".into()),
+                name: "content_analysis".into(),
+                schema: Some(schema),
+                strict: Some(true),
+            },
+        };
+
+        // Construct the system and user messages
+        let system_message = format!(
+            "You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON-LD object representing the content, a moderately short description of the document, how it relates to the submitted category and any relevant instructions. You shall also include related objects. The goal is to insert your output into a graph database."
+        );
+
+        let user_message = format!(
+            "Category: {}\nInstructions: {}\nContent:\n{}",
+            category, instructions, text
+        );
+
+        // Build the chat completion request
+        let request = CreateChatCompletionRequestArgs::default()
+            .model("gpt-4o-mini") 
+            .max_tokens(2048u32)
+            .messages([
+                ChatCompletionRequestSystemMessage::from(system_message).into(),
+                ChatCompletionRequestUserMessage::from(user_message).into(),
+            ])
+            .response_format(response_format)
+            .build().map_err(|e| ProcessingError::LLMError(e.to_string()))?;
+
+        // Send the request to OpenAI
+        let response = client.chat().create(request).await.map_err(|e| {
+            ProcessingError::LLMError(format!("OpenAI API request failed: {}", e.to_string()))
+        })?;
+
+        info!("{:?}", response);
+
+        // Extract and parse the response
+        for choice in response.choices {
+            if let Some(content) = choice.message.content {
+                let analysis: AnalysisResult = serde_json::from_str(&content).map_err(|e| {
+                    ProcessingError::LLMError(format!(
+                        "Failed to parse LLM response into LLMAnalysis: {}",
+                        e.to_string()
+                    ))
+                })?;
+                return Ok(analysis);
+            }
+        }
+
+        Err(ProcessingError::LLMError(
+            "No content found in LLM response".into(),
+        ))
+    }
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -1 +1 @@
-
+pub mod llm;