llm response enforced

This commit is contained in:
Per Stark
2024-10-18 08:10:05 +02:00
parent 06b70d7d4e
commit f860e8ff86
4 changed files with 199 additions and 28 deletions

View File

@@ -0,0 +1,103 @@
use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;
/// Represents a generic knowledge entity in the graph.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct KnowledgeEntity {
pub id: Uuid, // Generated in Rust
pub name: String,
pub description: String,
pub entity_type: KnowledgeEntityType,
pub source_id: Option<Uuid>, // Links to FileInfo or TextContent
pub metadata: Option<serde_json::Value>, // Additional metadata
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum KnowledgeEntityType {
Idea,
Project,
Document,
Page,
TextSnippet,
// Add more types as needed
}
impl From<String> for KnowledgeEntityType {
fn from(s: String) -> Self {
match s.to_lowercase().as_str() {
"idea" => KnowledgeEntityType::Idea,
"project" => KnowledgeEntityType::Project,
"document" => KnowledgeEntityType::Document,
"page" => KnowledgeEntityType::Page,
"textsnippet" => KnowledgeEntityType::TextSnippet,
_ => KnowledgeEntityType::Document, // Default case
}
}
}
/// Represents a relationship between two knowledge entities.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Relationship {
pub id: Uuid, // Generated in Rust
#[serde(rename = "in")]
pub in_: Uuid, // Target KnowledgeEntity ID
pub out: Uuid, // Source KnowledgeEntity ID
pub relationship_type: String, // e.g., RelatedTo, RelevantTo
pub metadata: Option<serde_json::Value>, // Additional metadata
}
use std::collections::HashMap;
use crate::utils::llm::LLMKnowledgeEntity;
use crate::utils::llm::LLMRelationship;
/// Intermediate struct to hold mapping between LLM keys and generated IDs.
pub struct GraphMapper {
pub key_to_id: HashMap<String, Uuid>,
}
impl GraphMapper {
pub fn new() -> Self {
GraphMapper {
key_to_id: HashMap::new(),
}
}
/// Assigns a new UUID for a given key.
pub fn assign_id(&mut self, key: &str) -> Uuid {
let id = Uuid::new_v4();
self.key_to_id.insert(key.to_string(), id);
id
}
/// Retrieves the UUID for a given key.
pub fn get_id(&self, key: &str) -> Option<&Uuid> {
self.key_to_id.get(key)
}
}
impl From<&LLMKnowledgeEntity> for KnowledgeEntity {
fn from(llm_entity: &LLMKnowledgeEntity) -> Self {
KnowledgeEntity {
id: Uuid::new_v4(),
name: llm_entity.name.clone(),
description: llm_entity.description.clone(),
entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()),
source_id: None, // To be linked externally if needed
metadata: None, // Populate if metadata is provided
}
}
}
impl From<&LLMRelationship> for Relationship {
fn from(llm_rel: &LLMRelationship) -> Self {
Relationship {
id: Uuid::new_v4(),
in_: Uuid::nil(), // Placeholder; to be set after mapping
out: Uuid::nil(), // Placeholder; to be set after mapping
relationship_type: llm_rel.type_.clone(),
metadata: None, // Populate if metadata is provided
}
}
}

View File

@@ -1,4 +1,5 @@
pub mod file_info;
pub mod graph_entities;
pub mod ingress_content;
pub mod ingress_object;
pub mod text_content;

View File

@@ -74,7 +74,7 @@ impl TextContent {
// Step 1: Send to LLM for analysis
let analysis = create_json_ld(&self.category, &self.instructions, &self.text).await?;
info!("{:?}", &analysis);
info!("{:#?}", &analysis);
// Step 2: Store analysis results in Graph DB
// client.store_knowledge_source(&analysis.knowledge_source).await?;

View File

@@ -1,44 +1,79 @@
use async_openai::types::ChatCompletionRequestSystemMessage;
use async_openai::types::ChatCompletionRequestUserMessage;
use async_openai::types::CreateChatCompletionRequestArgs;
use serde::Deserialize;
use serde::Serialize;
use tracing::debug;
use crate::models::text_content::ProcessingError;
use serde_json::json;
use crate::models::text_content::AnalysisResult;
/// Represents a single knowledge entity from the LLM.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LLMKnowledgeEntity {
pub key: String, // Temporary identifier
pub name: String,
pub description: String,
pub entity_type: String, // Should match KnowledgeEntityType variants
}
/// Represents a single relationship from the LLM.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LLMRelationship {
#[serde(rename = "type")]
pub type_: String, // e.g., RelatedTo, RelevantTo
pub source: String, // Key of the source entity
pub target: String, // Key of the target entity
}
/// Represents the entire graph analysis result from the LLM.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LLMGraphAnalysisResult {
pub knowledge_entities: Vec<LLMKnowledgeEntity>,
pub relationships: Vec<LLMRelationship>,
}
/// Sends text to an LLM for analysis.
pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> Result<AnalysisResult, ProcessingError> {
pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> Result<LLMGraphAnalysisResult, ProcessingError> {
let client = async_openai::Client::new();
let schema = json!({
let schema = json!({
"type": "object",
"properties": {
"knowledge_source": {
"type": "object",
"knowledge_entities": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"type": {"type": "string", "enum": ["Document", "Page", "TextSnippet"]},
"title": {"type": "string"},
"description": {"type": "string"},
"relationships": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {"type": "string", "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]},
"target": {"type": "string", "description": "ID of the related knowledge source"}
},
"required": ["type", "target"],
"additionalProperties": false,
}
"key": { "type": "string" },
"name": { "type": "string" },
"description": { "type": "string" },
"entity_type": {
"type": "string",
"enum": ["idea", "project", "document", "page", "textsnippet"]
}
},
"required": ["id", "type", "title", "description", "relationships"],
"additionalProperties": false,
"required": ["key", "name", "description", "entity_type"],
"additionalProperties": false
}
},
"category": {"type": "string"},
"instructions": {"type": "string"}
"relationships": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["RelatedTo", "RelevantTo", "SimilarTo"]
},
"source": { "type": "string" },
"target": { "type": "string" }
},
"required": ["type", "source", "target"],
"additionalProperties": false
}
}
},
"required": ["knowledge_source", "category", "instructions"],
"required": ["knowledge_entities", "relationships"],
"additionalProperties": false
});
@@ -52,7 +87,39 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> R
};
// Construct the system and user messages
let system_message = "You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON-LD object representing the content, a moderately short description of the document, how it relates to the submitted category and any relevant instructions. You shall also include related objects. The goal is to insert your output into a graph database.".to_string();
let system_message = r#"
You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database.
The JSON should have the following structure:
{
"knowledge_entities": [
{
"key": "unique-key-1",
"name": "Entity Name",
"description": "A detailed description of the entity.",
"entity_type": "TypeOfEntity"
},
// More entities...
],
"relationships": [
{
"type": "RelationshipType",
"source": "unique-key-1",
"target": "unique-key-2"
},
// More relationships...
]
}
Guidelines:
1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.
2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.
3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity.
6. Optionally, add any relevant metadata within each object as needed."#;
let user_message = format!(
"Category: {}\nInstructions: {}\nContent:\n{}",
@@ -80,9 +147,9 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> R
// Extract and parse the response
for choice in response.choices {
if let Some(content) = choice.message.content {
let analysis: AnalysisResult = serde_json::from_str(&content).map_err(|e| {
let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| {
ProcessingError::LLMError(format!(
"Failed to parse LLM response into LLMAnalysis: {}",
"Failed to parse LLM response into analysis: {}",
e
))
})?;