llm analysis wip

This commit is contained in:
Per Stark
2024-09-30 20:51:42 +02:00
parent f3ad3e1893
commit dcb82ca454
10 changed files with 555 additions and 155 deletions

View File

@@ -1,4 +1,7 @@
use async_openai::types::{ ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage, CreateChatCompletionRequestArgs};
use serde::{Deserialize, Serialize};
use serde_json::json;
use tracing::info;
use crate::models::file_info::FileInfo;
use thiserror::Error;
@@ -11,6 +14,31 @@ pub struct TextContent {
pub category: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct LLMAnalysis {
pub json_ld: serde_json::Value,
pub description: String,
pub related_category: String,
pub instructions: String,
}
/// Error types for processing `TextContent`.
#[derive(Error, Debug)]
pub enum ProcessingError {
#[error("LLM processing error: {0}")]
LLMError(String),
#[error("Graph DB storage error: {0}")]
GraphDBError(String),
#[error("Vector DB storage error: {0}")]
VectorDBError(String),
#[error("Unknown processing error")]
Unknown,
}
impl TextContent {
/// Creates a new `TextContent` instance.
pub fn new(text: String, file_info: Option<FileInfo>, instructions: String, category: String) -> Self {
@@ -26,42 +54,101 @@ impl TextContent {
pub async fn process(&self) -> Result<(), ProcessingError> {
// Step 1: Send to LLM for analysis
let analysis = self.send_to_llm().await?;
info!("{:?}", analysis);
// Step 2: Store analysis results in Graph DB
self.store_in_graph_db(&analysis).await?;
// self.store_in_graph_db(&analysis).await?;
// Step 3: Split text and store in Vector DB
self.store_in_vector_db().await?;
// self.store_in_vector_db().await?;
Ok(())
}
/// Sends text to an LLM for analysis.
async fn send_to_llm(&self) -> Result<LLMAnalysis, ProcessingError> {
// TODO: Implement interaction with your specific LLM API.
// Example using reqwest:
/*
let client = reqwest::Client::new();
let response = client.post("http://llm-api/analyze")
.json(&serde_json::json!({ "text": self.text }))
.send()
.await
.map_err(|e| ProcessingError::LLMError(e.to_string()))?;
if !response.status().is_success() {
return Err(ProcessingError::LLMError(format!("LLM API returned status: {}", response.status())));
let client = async_openai::Client::new();
// Define the JSON Schema for the expected response
let schema = json!({
"type": "object",
"properties": {
"json_ld": {
"type": "object",
"properties": {
"@context": { "type": "string" },
"@type": { "type": "string" },
"name": { "type": "string" }
// Define only the essential properties
},
"required": ["@context", "@type", "name"],
"additionalProperties": false
},
"description": { "type": "string" },
"related_category": { "type": "string" },
"instructions": { "type": "string" }
},
"required": ["json_ld", "description", "related_category", "instructions"],
"additionalProperties": false
});
let response_format = async_openai::types::ResponseFormat::JsonSchema {
json_schema: async_openai::types::ResponseFormatJsonSchema {
description: Some("Structured analysis of the submitted content".into()),
name: "content_analysis".into(),
schema: Some(schema),
strict: Some(true),
},
};
// Construct the system and user messages
let system_message = format!(
"You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON-LD object representing the content, a short description of the document, how it relates to the submitted category, and any relevant instructions."
);
let user_message = format!(
"Category: {}\nInstructions: {}\nContent:\n{}",
self.category, self.instructions, self.text
);
// Build the chat completion request
let request = CreateChatCompletionRequestArgs::default()
.model("gpt-4o-mini")
.max_tokens(1024u32)
.messages([
ChatCompletionRequestSystemMessage::from(system_message).into(),
ChatCompletionRequestUserMessage::from(user_message).into(),
])
.response_format(response_format)
.build().map_err(|e| ProcessingError::LLMError(e.to_string()))?;
// Send the request to OpenAI
let response = client.chat().create(request).await.map_err(|e| {
ProcessingError::LLMError(format!("OpenAI API request failed: {}", e.to_string()))
})?;
info!("{:?}", response);
// Extract and parse the response
for choice in response.choices {
if let Some(content) = choice.message.content {
let analysis: LLMAnalysis = serde_json::from_str(&content).map_err(|e| {
ProcessingError::LLMError(format!(
"Failed to parse LLM response into LLMAnalysis: {}",
e.to_string()
))
})?;
return Ok(analysis);
}
}
let analysis: LLMAnalysis = response.json().await
.map_err(|e| ProcessingError::LLMError(e.to_string()))?;
Ok(analysis)
*/
unimplemented!()
Err(ProcessingError::LLMError(
"No content found in LLM response".into(),
))
}
/// Stores analysis results in a graph database.
async fn store_in_graph_db(&self, analysis: &LLMAnalysis) -> Result<(), ProcessingError> {
async fn store_in_graph_db(&self, _analysis: &LLMAnalysis) -> Result<(), ProcessingError> {
// TODO: Implement storage logic for your specific graph database.
// Example:
/*
@@ -85,28 +172,3 @@ impl TextContent {
unimplemented!()
}
}
/// Represents the analysis results from the LLM.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LLMAnalysis {
pub entities: Vec<String>,
pub summary: String,
// Add other fields based on your LLM's output.
}
/// Error types for processing `TextContent`.
#[derive(Error, Debug)]
pub enum ProcessingError {
#[error("LLM processing error: {0}")]
LLMError(String),
#[error("Graph DB storage error: {0}")]
GraphDBError(String),
#[error("Vector DB storage error: {0}")]
VectorDBError(String),
#[error("Unknown processing error")]
Unknown,
}