mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-17 23:14:08 +01:00
llm response enforced
This commit is contained in:
103
src/models/graph_entities.rs
Normal file
103
src/models/graph_entities.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Represents a generic knowledge entity in the graph.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct KnowledgeEntity {
|
||||
pub id: Uuid, // Generated in Rust
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub entity_type: KnowledgeEntityType,
|
||||
pub source_id: Option<Uuid>, // Links to FileInfo or TextContent
|
||||
pub metadata: Option<serde_json::Value>, // Additional metadata
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum KnowledgeEntityType {
|
||||
Idea,
|
||||
Project,
|
||||
Document,
|
||||
Page,
|
||||
TextSnippet,
|
||||
// Add more types as needed
|
||||
}
|
||||
|
||||
impl From<String> for KnowledgeEntityType {
|
||||
fn from(s: String) -> Self {
|
||||
match s.to_lowercase().as_str() {
|
||||
"idea" => KnowledgeEntityType::Idea,
|
||||
"project" => KnowledgeEntityType::Project,
|
||||
"document" => KnowledgeEntityType::Document,
|
||||
"page" => KnowledgeEntityType::Page,
|
||||
"textsnippet" => KnowledgeEntityType::TextSnippet,
|
||||
_ => KnowledgeEntityType::Document, // Default case
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a relationship between two knowledge entities.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Relationship {
|
||||
pub id: Uuid, // Generated in Rust
|
||||
#[serde(rename = "in")]
|
||||
pub in_: Uuid, // Target KnowledgeEntity ID
|
||||
pub out: Uuid, // Source KnowledgeEntity ID
|
||||
pub relationship_type: String, // e.g., RelatedTo, RelevantTo
|
||||
pub metadata: Option<serde_json::Value>, // Additional metadata
|
||||
}
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::utils::llm::LLMKnowledgeEntity;
|
||||
use crate::utils::llm::LLMRelationship;
|
||||
|
||||
/// Intermediate struct to hold mapping between LLM keys and generated IDs.
|
||||
pub struct GraphMapper {
|
||||
pub key_to_id: HashMap<String, Uuid>,
|
||||
}
|
||||
|
||||
impl GraphMapper {
|
||||
pub fn new() -> Self {
|
||||
GraphMapper {
|
||||
key_to_id: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Assigns a new UUID for a given key.
|
||||
pub fn assign_id(&mut self, key: &str) -> Uuid {
|
||||
let id = Uuid::new_v4();
|
||||
self.key_to_id.insert(key.to_string(), id);
|
||||
id
|
||||
}
|
||||
|
||||
/// Retrieves the UUID for a given key.
|
||||
pub fn get_id(&self, key: &str) -> Option<&Uuid> {
|
||||
self.key_to_id.get(key)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&LLMKnowledgeEntity> for KnowledgeEntity {
|
||||
fn from(llm_entity: &LLMKnowledgeEntity) -> Self {
|
||||
KnowledgeEntity {
|
||||
id: Uuid::new_v4(),
|
||||
name: llm_entity.name.clone(),
|
||||
description: llm_entity.description.clone(),
|
||||
entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()),
|
||||
source_id: None, // To be linked externally if needed
|
||||
metadata: None, // Populate if metadata is provided
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&LLMRelationship> for Relationship {
|
||||
fn from(llm_rel: &LLMRelationship) -> Self {
|
||||
Relationship {
|
||||
id: Uuid::new_v4(),
|
||||
in_: Uuid::nil(), // Placeholder; to be set after mapping
|
||||
out: Uuid::nil(), // Placeholder; to be set after mapping
|
||||
relationship_type: llm_rel.type_.clone(),
|
||||
metadata: None, // Populate if metadata is provided
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod file_info;
|
||||
pub mod graph_entities;
|
||||
pub mod ingress_content;
|
||||
pub mod ingress_object;
|
||||
pub mod text_content;
|
||||
|
||||
@@ -74,7 +74,7 @@ impl TextContent {
|
||||
|
||||
// Step 1: Send to LLM for analysis
|
||||
let analysis = create_json_ld(&self.category, &self.instructions, &self.text).await?;
|
||||
info!("{:?}", &analysis);
|
||||
info!("{:#?}", &analysis);
|
||||
|
||||
// Step 2: Store analysis results in Graph DB
|
||||
// client.store_knowledge_source(&analysis.knowledge_source).await?;
|
||||
|
||||
121
src/utils/llm.rs
121
src/utils/llm.rs
@@ -1,44 +1,79 @@
|
||||
use async_openai::types::ChatCompletionRequestSystemMessage;
|
||||
use async_openai::types::ChatCompletionRequestUserMessage;
|
||||
use async_openai::types::CreateChatCompletionRequestArgs;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use crate::models::text_content::ProcessingError;
|
||||
use serde_json::json;
|
||||
use crate::models::text_content::AnalysisResult;
|
||||
|
||||
/// Represents a single knowledge entity from the LLM.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct LLMKnowledgeEntity {
|
||||
pub key: String, // Temporary identifier
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub entity_type: String, // Should match KnowledgeEntityType variants
|
||||
}
|
||||
|
||||
/// Represents a single relationship from the LLM.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct LLMRelationship {
|
||||
#[serde(rename = "type")]
|
||||
pub type_: String, // e.g., RelatedTo, RelevantTo
|
||||
pub source: String, // Key of the source entity
|
||||
pub target: String, // Key of the target entity
|
||||
}
|
||||
|
||||
/// Represents the entire graph analysis result from the LLM.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct LLMGraphAnalysisResult {
|
||||
pub knowledge_entities: Vec<LLMKnowledgeEntity>,
|
||||
pub relationships: Vec<LLMRelationship>,
|
||||
}
|
||||
|
||||
/// Sends text to an LLM for analysis.
|
||||
pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> Result<AnalysisResult, ProcessingError> {
|
||||
pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> Result<LLMGraphAnalysisResult, ProcessingError> {
|
||||
let client = async_openai::Client::new();
|
||||
let schema = json!({
|
||||
let schema = json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"knowledge_source": {
|
||||
"type": "object",
|
||||
"knowledge_entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"type": {"type": "string", "enum": ["Document", "Page", "TextSnippet"]},
|
||||
"title": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"relationships": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"type": "string", "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]},
|
||||
"target": {"type": "string", "description": "ID of the related knowledge source"}
|
||||
},
|
||||
"required": ["type", "target"],
|
||||
"additionalProperties": false,
|
||||
}
|
||||
"key": { "type": "string" },
|
||||
"name": { "type": "string" },
|
||||
"description": { "type": "string" },
|
||||
"entity_type": {
|
||||
"type": "string",
|
||||
"enum": ["idea", "project", "document", "page", "textsnippet"]
|
||||
}
|
||||
},
|
||||
"required": ["id", "type", "title", "description", "relationships"],
|
||||
"additionalProperties": false,
|
||||
"required": ["key", "name", "description", "entity_type"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"category": {"type": "string"},
|
||||
"instructions": {"type": "string"}
|
||||
"relationships": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": ["RelatedTo", "RelevantTo", "SimilarTo"]
|
||||
},
|
||||
"source": { "type": "string" },
|
||||
"target": { "type": "string" }
|
||||
},
|
||||
"required": ["type", "source", "target"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["knowledge_source", "category", "instructions"],
|
||||
"required": ["knowledge_entities", "relationships"],
|
||||
"additionalProperties": false
|
||||
});
|
||||
|
||||
@@ -52,7 +87,39 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> R
|
||||
};
|
||||
|
||||
// Construct the system and user messages
|
||||
let system_message = "You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON-LD object representing the content, a moderately short description of the document, how it relates to the submitted category and any relevant instructions. You shall also include related objects. The goal is to insert your output into a graph database.".to_string();
|
||||
let system_message = r#"
|
||||
You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database.
|
||||
|
||||
The JSON should have the following structure:
|
||||
|
||||
{
|
||||
"knowledge_entities": [
|
||||
{
|
||||
"key": "unique-key-1",
|
||||
"name": "Entity Name",
|
||||
"description": "A detailed description of the entity.",
|
||||
"entity_type": "TypeOfEntity"
|
||||
},
|
||||
// More entities...
|
||||
],
|
||||
"relationships": [
|
||||
{
|
||||
"type": "RelationshipType",
|
||||
"source": "unique-key-1",
|
||||
"target": "unique-key-2"
|
||||
},
|
||||
// More relationships...
|
||||
]
|
||||
}
|
||||
|
||||
Guidelines:
|
||||
1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.
|
||||
2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.
|
||||
3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
|
||||
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
|
||||
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity.
|
||||
6. Optionally, add any relevant metadata within each object as needed."#;
|
||||
|
||||
|
||||
let user_message = format!(
|
||||
"Category: {}\nInstructions: {}\nContent:\n{}",
|
||||
@@ -80,9 +147,9 @@ pub async fn create_json_ld(category: &str, instructions: &str, text: &str) -> R
|
||||
// Extract and parse the response
|
||||
for choice in response.choices {
|
||||
if let Some(content) = choice.message.content {
|
||||
let analysis: AnalysisResult = serde_json::from_str(&content).map_err(|e| {
|
||||
let analysis: LLMGraphAnalysisResult = serde_json::from_str(&content).map_err(|e| {
|
||||
ProcessingError::LLMError(format!(
|
||||
"Failed to parse LLM response into LLMAnalysis: {}",
|
||||
"Failed to parse LLM response into analysis: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
Reference in New Issue
Block a user