refactoring: ingress_analyser and cleaning

2026-07-04 12:01:48 +02:00 · 2024-11-21 20:26:59 +01:00
parent 22abd3d731
commit 94f328e542
13 changed files with 420 additions and 375 deletions
@@ -1,61 +0,0 @@
 # SurrealDB only
 Right now we have the FileInfo stored in "files"
 - Change the uuid to Uuid type, and have the database layer still use String. Means parsing and unparsing but thats fine.
 ```
 pub struct FileInfo {
    pub uuid: String,
    pub sha256: String,
    pub path: String,
    pub mime_type: String,
 }
 ```
 We create TextContent objects, which we should store?
 - We store the "snippets" along with the vectors, but it would make sense to store the whole textcontent, at least for not enormous files?
 ```
 pub struct TextContent {
    pub id: Uuid,
    pub text: String,
    pub file_info: Option<FileInfo>,
    pub instructions: String,
    pub category: String,
 }
 ```
 We create KnowledgeSource, which we will store as a node, and its relationship as edges
 - Add a uuid to we can link the textcontent and files to the knowledge sources?
 - Rename id to name, and use it as the id? Id represents the TextContent link, from which several knowledgeentities can be gathered.
 ```
 pub struct KnowledgeSource {
    pub name: String,
    pub id: Uuid,
    pub title: String,
    pub description: String,
    pub relationships: Vec<Relationship>,
 }
 ```
 We will create embeddings and vector representations of TextContent, possibly split up and store in vector DB
 ```
 pub struct VectorEmbeddingOfTextContent {
    pub id: Uuid,
    pub vectors: Vec<u8>(or something),
    pub text_content: String,
    pub category: String,
 }
 ```
 ______________________________________________________________________
 ## Goals
 - Smooth operations when updating, removing and adding data
 - Smooth queries where one can search, get a vector snippet, which links to a graph node and its edges, and also the fulltext document.
@@ -0,0 +1,147 @@
 use crate::{
    analysis::ingress::{
        prompt::{get_ingress_analysis_schema, INGRESS_ANALYSIS_SYSTEM_MESSAGE},
        types::llm_analysis_result::LLMGraphAnalysisResult,
    },
    error::ProcessingError,
    retrieval::vector::find_items_by_vector_similarity,
    storage::types::{knowledge_entity::KnowledgeEntity, StoredObject},
 };
 use async_openai::types::{
    ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
    CreateChatCompletionRequest, CreateChatCompletionRequestArgs, ResponseFormat,
    ResponseFormatJsonSchema,
 };
 use serde_json::json;
 use surrealdb::engine::remote::ws::Client;
 use surrealdb::Surreal;
 use tracing::{debug, instrument};
 pub struct IngressAnalyzer<'a> {
    db_client: &'a Surreal<Client>,
    openai_client: &'a async_openai::Client<async_openai::config::OpenAIConfig>,
 }
 impl<'a> IngressAnalyzer<'a> {
    pub fn new(
        db_client: &'a Surreal<Client>,
        openai_client: &'a async_openai::Client<async_openai::config::OpenAIConfig>,
    ) -> Self {
        Self {
            db_client,
            openai_client,
        }
    }
    #[instrument(skip(self))]
    pub async fn analyze_content(
        &self,
        category: &str,
        instructions: &str,
        text: &str,
    ) -> Result<LLMGraphAnalysisResult, ProcessingError> {
        let similar_entities = self
            .find_similar_entities(category, instructions, text)
            .await?;
        let llm_request =
            self.prepare_llm_request(category, instructions, text, &similar_entities)?;
        self.perform_analysis(llm_request).await
    }
    #[instrument(skip(self))]
    async fn find_similar_entities(
        &self,
        category: &str,
        instructions: &str,
        text: &str,
    ) -> Result<Vec<KnowledgeEntity>, ProcessingError> {
        let input_text = format!(
            "content: {}, category: {}, user_instructions: {}",
            text, category, instructions
        );
        find_items_by_vector_similarity(
            10,
            input_text,
            self.db_client,
            KnowledgeEntity::table_name().to_string(),
            self.openai_client,
        )
        .await
    }
    #[instrument(skip(self))]
    fn prepare_llm_request(
        &self,
        category: &str,
        instructions: &str,
        text: &str,
        similar_entities: &[KnowledgeEntity],
    ) -> Result<CreateChatCompletionRequest, ProcessingError> {
        let entities_json = json!(similar_entities
            .iter()
            .map(|entity| {
                json!({
                    "KnowledgeEntity": {
                        "id": entity.id,
                        "name": entity.name,
                        "description": entity.description
                    }
                })
            })
            .collect::<Vec<_>>());
        let user_message = format!(
            "Category:\n{}\nInstructions:\n{}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}",
            category, instructions, text, entities_json
        );
        debug!("Prepared LLM request message: {}", user_message);
        let response_format = ResponseFormat::JsonSchema {
            json_schema: ResponseFormatJsonSchema {
                description: Some("Structured analysis of the submitted content".into()),
                name: "content_analysis".into(),
                schema: Some(get_ingress_analysis_schema()),
                strict: Some(true),
            },
        };
        CreateChatCompletionRequestArgs::default()
            .model("gpt-4-mini")
            .temperature(0.2)
            .max_tokens(2048u32)
            .messages([
                ChatCompletionRequestSystemMessage::from(INGRESS_ANALYSIS_SYSTEM_MESSAGE).into(),
                ChatCompletionRequestUserMessage::from(user_message).into(),
            ])
            .response_format(response_format)
            .build()
            .map_err(|e| ProcessingError::LLMParsingError(e.to_string()))
    }
    #[instrument(skip(self, request))]
    async fn perform_analysis(
        &self,
        request: CreateChatCompletionRequest,
    ) -> Result<LLMGraphAnalysisResult, ProcessingError> {
        let response = self.openai_client.chat().create(request).await?;
        debug!("Received LLM response: {:?}", response);
        response
            .choices
            .first()
            .and_then(|choice| choice.message.content.as_ref())
            .ok_or(ProcessingError::LLMParsingError(
                "No content found in LLM response".into(),
            ))
            .and_then(|content| {
                serde_json::from_str(content).map_err(|e| {
                    ProcessingError::LLMParsingError(format!(
                        "Failed to parse LLM response into analysis: {}",
                        e
                    ))
                })
            })
    }
 }
@@ -0,0 +1,3 @@
 pub mod ingress_analyser;
 pub mod prompt;
 pub mod types;
@@ -0,0 +1,81 @@
 use serde_json::{json, Value};
 pub static INGRESS_ANALYSIS_SYSTEM_MESSAGE: &str = r#"
            You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these!
            The JSON should have the following structure:
            {
                "knowledge_entities": [
                    {
                        "key": "unique-key-1",
                        "name": "Entity Name",
                        "description": "A detailed description of the entity.",
                        "entity_type": "TypeOfEntity"
                    },
                    // More entities...
                ],
                "relationships": [
                    {
                        "type": "RelationshipType",
                        "source": "unique-key-1 or UUID from existing database",
                        "target": "unique-key-1 or UUID from existing database"
                    },
                    // More relationships...
                ]
            }
            Guidelines:
            1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.
            2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.
            3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
            4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
            5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
            6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.
            7. Only create relationships between existing KnowledgeEntities.
            8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.
            9. A new relationship MUST include a newly created KnowledgeEntity.
            "#;
 pub fn get_ingress_analysis_schema() -> Value {
    json!({
      "type": "object",
      "properties": {
        "knowledge_entities": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "key": { "type": "string" },
              "name": { "type": "string" },
              "description": { "type": "string" },
              "entity_type": {
                "type": "string",
                "enum": ["idea", "project", "document", "page", "textsnippet"]
              }
            },
            "required": ["key", "name", "description", "entity_type"],
            "additionalProperties": false
          }
        },
        "relationships": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "type": {
                "type": "string",
                "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]
              },
              "source": { "type": "string" },
              "target": { "type": "string" }
            },
            "required": ["type", "source", "target"],
            "additionalProperties": false
          }
        }
      },
      "required": ["knowledge_entities", "relationships"],
      "additionalProperties": false
    })
 }
@@ -0,0 +1,174 @@
 use std::sync::{Arc, Mutex};
 use serde::{Deserialize, Serialize};
 use tokio::task;
 use crate::{
    error::ProcessingError,
    models::graph_entities::GraphMapper,
    storage::types::{
        knowledge_entity::{KnowledgeEntity, KnowledgeEntityType},
        knowledge_relationship::KnowledgeRelationship,
    },
    utils::embedding::generate_embedding,
 };
 use futures::future::try_join_all; // For future parallelization
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMKnowledgeEntity {
    pub key: String, // Temporary identifier
    pub name: String,
    pub description: String,
    pub entity_type: String, // Should match KnowledgeEntityType variants
 }
 /// Represents a single relationship from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMRelationship {
    #[serde(rename = "type")]
    pub type_: String, // e.g., RelatedTo, RelevantTo
    pub source: String, // Key of the source entity
    pub target: String, // Key of the target entity
 }
 /// Represents the entire graph analysis result from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMGraphAnalysisResult {
    pub knowledge_entities: Vec<LLMKnowledgeEntity>,
    pub relationships: Vec<LLMRelationship>,
 }
 /// Converts the LLM graph analysis result into database entities and relationships.
 /// Processes embeddings sequentially for simplicity.
 ///
 /// # Arguments
 ///
 /// * `source_id` - A UUID representing the source identifier.
 /// * `openai_client` - OpenAI client for LLM calls.
 ///
 /// # Returns
 ///
 /// * `Result<(Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>), ProcessingError>` - A tuple containing vectors of `KnowledgeEntity` and `KnowledgeRelationship`.
 impl LLMGraphAnalysisResult {
    // Split the main function into smaller, focused functions
    pub async fn to_database_entities(
        &self,
        source_id: &str,
        openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
    ) -> Result<(Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>), ProcessingError> {
        // Create mapper and pre-assign IDs
        let mapper = Arc::new(Mutex::new(self.create_mapper()?));
        // Process entities (prepared for future parallelization)
        let entities = self
            .process_entities(source_id, Arc::clone(&mapper), openai_client)
            .await?;
        // Process relationships
        let relationships = self.process_relationships(Arc::clone(&mapper))?;
        Ok((entities, relationships))
    }
    fn create_mapper(&self) -> Result<GraphMapper, ProcessingError> {
        let mut mapper = GraphMapper::new();
        // Pre-assign all IDs
        for entity in &self.knowledge_entities {
            mapper.assign_id(&entity.key);
        }
        Ok(mapper)
    }
    async fn process_entities(
        &self,
        source_id: &str,
        mapper: Arc<Mutex<GraphMapper>>,
        openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
    ) -> Result<Vec<KnowledgeEntity>, ProcessingError> {
        let futures: Vec<_> = self
            .knowledge_entities
            .iter()
            .map(|entity| {
                let mapper = Arc::clone(&mapper);
                let openai_client = openai_client.clone();
                let source_id = source_id.to_string();
                let entity = entity.clone();
                task::spawn(async move {
                    create_single_entity(&entity, &source_id, mapper, &openai_client).await
                })
            })
            .collect();
        let results = try_join_all(futures)
            .await?
            .into_iter()
            .collect::<Result<Vec<_>, _>>()?;
        Ok(results)
    }
    fn process_relationships(
        &self,
        mapper: Arc<Mutex<GraphMapper>>,
    ) -> Result<Vec<KnowledgeRelationship>, ProcessingError> {
        let mut mapper_guard = mapper
            .lock()
            .map_err(|_| ProcessingError::GraphProcessingError("Failed to lock mapper".into()))?;
        self.relationships
            .iter()
            .map(|rel| {
                let source_db_id = mapper_guard.get_or_parse_id(&rel.source);
                let target_db_id = mapper_guard.get_or_parse_id(&rel.target);
                Ok(KnowledgeRelationship::new(
                    source_db_id.to_string(),
                    target_db_id.to_string(),
                    rel.type_.clone(),
                    None,
                ))
            })
            .collect()
    }
 }
 async fn create_single_entity(
    llm_entity: &LLMKnowledgeEntity,
    source_id: &str,
    mapper: Arc<Mutex<GraphMapper>>,
    openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
 ) -> Result<KnowledgeEntity, ProcessingError> {
    let assigned_id = {
        let mapper = mapper
            .lock()
            .map_err(|_| ProcessingError::GraphProcessingError("Failed to lock mapper".into()))?;
        mapper
            .get_id(&llm_entity.key)
            .ok_or_else(|| {
                ProcessingError::GraphProcessingError(format!(
                    "ID not found for key: {}",
                    llm_entity.key
                ))
            })?
            .to_string()
    };
    let embedding_input = format!(
        "name: {}, description: {}, type: {}",
        llm_entity.name, llm_entity.description, llm_entity.entity_type
    );
    let embedding = generate_embedding(openai_client, embedding_input).await?;
    Ok(KnowledgeEntity {
        id: assigned_id,
        name: llm_entity.name.to_string(),
        description: llm_entity.description.to_string(),
        entity_type: KnowledgeEntityType::from(llm_entity.entity_type.to_string()),
        source_id: source_id.to_string(),
        metadata: None,
        embedding,
    })
 }
@@ -0,0 +1 @@
 pub mod llm_analysis_result;
@@ -0,0 +1 @@
 pub mod ingress;
@@ -1,5 +1,6 @@
 use async_openai::error::OpenAIError;
 use thiserror::Error;
 use tokio::task::JoinError;
 /// Error types for processing `TextContent`.
 #[derive(Error, Debug)]
@@ -18,4 +19,7 @@ pub enum ProcessingError {
    #[error("LLM parsing error: {0}")]
    LLMParsingError(String),
    #[error("Task join error: {0}")]
    JoinError(#[from] JoinError),
 }
@@ -1,3 +1,4 @@
 pub mod analysis;
 pub mod error;
 pub mod models;
 pub mod rabbitmq;
@@ -2,7 +2,6 @@ use super::ingress_content::IngressContentError;
 use crate::models::file_info::FileInfo;
 use crate::storage::types::text_content::TextContent;
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 /// Knowledge object type, containing the content or reference to it, as well as metadata
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -1,3 +1,4 @@
 use crate::analysis::ingress::ingress_analyser::IngressAnalyzer;
 use crate::retrieval::graph::find_entities_by_source_id;
 use crate::retrieval::vector::find_items_by_vector_similarity;
 use crate::storage::db::store_item;
@@ -7,7 +8,7 @@ use crate::storage::types::text_chunk::TextChunk;
 use crate::storage::types::text_content::TextContent;
 use crate::storage::types::StoredObject;
 use crate::utils::embedding::generate_embedding;
-use crate::{error::ProcessingError, surrealdb::SurrealDbClient, utils::llm::create_json_ld};
+use crate::{error::ProcessingError, surrealdb::SurrealDbClient};
 use surrealdb::{engine::remote::ws::Client, Surreal};
 use text_splitter::TextSplitter;
 use tracing::{debug, info};
@@ -19,8 +20,7 @@ impl TextContent {
        let openai_client = async_openai::Client::new();
        // Store TextContent
-        let create_operation = store_item(&db_client, self.clone()).await?;
+        store_item(&db_client, self.clone()).await?;
        info!("{:?}", create_operation);
        // Get related nodes
        let closest_text_content: Vec<TextChunk> = find_items_by_vector_similarity(
@@ -48,15 +48,10 @@ impl TextContent {
        db_client.rebuild_indexes().await?;
        // Step 1: Send to LLM for analysis
-        let analysis = create_json_ld(
+        let analyser = IngressAnalyzer::new(&db_client, &openai_client);
-            &self.category,
+        let analysis = analyser
-            &self.instructions,
+            .analyze_content(&self.category, &self.instructions, &self.text)
-            &self.text,
+            .await?;
            &db_client,
            &openai_client,
        )
        .await?;
        // info!("{:#?}", &analysis);
        // Step 2: Convert LLM analysis to database entities
        let (entities, relationships) = analysis
@@ -116,7 +111,7 @@ impl TextContent {
        for chunk in chunks {
            info!("Chunk: {}", chunk);
-            let embedding = generate_embedding(&openai_client, chunk.to_string()).await?;
+            let embedding = generate_embedding(openai_client, chunk.to_string()).await?;
            let text_chunk = TextChunk::new(self.id.to_string(), chunk.to_string(), embedding);
            store_item(db_client, text_chunk).await?;
@@ -1,299 +0,0 @@
 use crate::{
    error::ProcessingError,
    models::graph_entities::GraphMapper,
    retrieval::vector::find_items_by_vector_similarity,
    storage::types::{
        knowledge_entity::{KnowledgeEntity, KnowledgeEntityType},
        knowledge_relationship::KnowledgeRelationship,
        StoredObject,
    },
 };
 use async_openai::types::{
    ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
    CreateChatCompletionRequestArgs,
 };
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 use surrealdb::engine::remote::ws::Client;
 use surrealdb::Surreal;
 use tracing::debug;
 use uuid::Uuid;
 use super::embedding::generate_embedding;
 /// Represents a single knowledge entity from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMKnowledgeEntity {
    pub key: String, // Temporary identifier
    pub name: String,
    pub description: String,
    pub entity_type: String, // Should match KnowledgeEntityType variants
 }
 /// Represents a single relationship from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMRelationship {
    #[serde(rename = "type")]
    pub type_: String, // e.g., RelatedTo, RelevantTo
    pub source: String, // Key of the source entity
    pub target: String, // Key of the target entity
 }
 /// Represents the entire graph analysis result from the LLM.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct LLMGraphAnalysisResult {
    pub knowledge_entities: Vec<LLMKnowledgeEntity>,
    pub relationships: Vec<LLMRelationship>,
 }
 impl LLMGraphAnalysisResult {
    /// Converts the LLM graph analysis result into database entities and relationships.
    /// Processes embeddings sequentially for simplicity.
    ///
    /// # Arguments
    ///
    /// * `source_id` - A UUID representing the source identifier.
    /// * `openai_client` - OpenAI client for LLM calls.
    ///
    /// # Returns
    ///
    /// * `Result<(Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>), ProcessingError>` - A tuple containing vectors of `KnowledgeEntity` and `KnowledgeRelationship`.
    pub async fn to_database_entities(
        &self,
        source_id: &String,
        openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
    ) -> Result<(Vec<KnowledgeEntity>, Vec<KnowledgeRelationship>), ProcessingError> {
        let mut mapper = GraphMapper::new();
        // Step 1: Assign unique IDs to all knowledge entities upfront
        for llm_entity in &self.knowledge_entities {
            mapper.assign_id(&llm_entity.key);
        }
        let mut entities = vec![];
        // Step 2: Process each knowledge entity sequentially
        for llm_entity in &self.knowledge_entities {
            // Retrieve the assigned ID for the current entity
            let assigned_id = mapper
                .get_id(&llm_entity.key)
                .ok_or_else(|| {
                    ProcessingError::GraphProcessingError(format!(
                        "ID not found for key: {}",
                        llm_entity.key
                    ))
                })?
                .clone();
            // Prepare the embedding input
            let embedding_input = format!(
                "name: {}, description: {}, type: {}",
                llm_entity.name, llm_entity.description, llm_entity.entity_type
            );
            // Generate embedding
            let embedding = generate_embedding(&openai_client, embedding_input).await?;
            // Construct the KnowledgeEntity with embedding
            let knowledge_entity = KnowledgeEntity {
                id: assigned_id.to_string(),
                name: llm_entity.name.clone(),
                description: llm_entity.description.clone(),
                entity_type: KnowledgeEntityType::from(llm_entity.entity_type.clone()),
                source_id: source_id.to_string(),
                metadata: None,
                embedding,
            };
            entities.push(knowledge_entity);
        }
        // Step 3: Process relationships using the pre-assigned IDs
        let relationships: Vec<KnowledgeRelationship> = self
            .relationships
            .iter()
            .filter_map(|llm_rel| {
                let source_db_id = mapper.get_or_parse_id(&llm_rel.source);
                let target_db_id = mapper.get_or_parse_id(&llm_rel.target);
                debug!("IN: {}, OUT: {}", &source_db_id, &target_db_id);
                Some(KnowledgeRelationship::new(
                    source_db_id.to_string(),
                    target_db_id.to_string(),
                    llm_rel.type_.to_owned(),
                    None,
                ))
            })
            .collect();
        Ok((entities, relationships))
    }
 }
 /// Sends text to an LLM for analysis.
 pub async fn create_json_ld(
    category: &str,
    instructions: &str,
    text: &str,
    db_client: &Surreal<Client>,
    openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
 ) -> Result<LLMGraphAnalysisResult, ProcessingError> {
    // Format the input for more cohesive comparison
    let input_text = format!(
        "content: {}, category: {}, user_instructions: {}",
        text, category, instructions
    );
    let closest_entities: Vec<KnowledgeEntity> = find_items_by_vector_similarity(
        10,
        input_text,
        db_client,
        KnowledgeEntity::table_name().to_string(),
        openai_client,
    )
    .await?;
    // Format the KnowledgeEntity, remove redudant fields
    let closest_entities_to_llm = json!(closest_entities
        .iter()
        .map(|entity| {
            json!({
                "KnowledgeEntity": {
                    "id": entity.id,
                    "name": entity.name,
                    "description": entity.description
                }
            })
        })
        .collect::<Vec<_>>());
    // let db_context = serde_json::to_string_pretty(&closest_entities_to_llm).unwrap();
    let schema = json!({
      "type": "object",
      "properties": {
        "knowledge_entities": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "key": { "type": "string" },
              "name": { "type": "string" },
              "description": { "type": "string" },
              "entity_type": {
                "type": "string",
                "enum": ["idea", "project", "document", "page", "textsnippet"]
              }
            },
            "required": ["key", "name", "description", "entity_type"],
            "additionalProperties": false
          }
        },
        "relationships": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "type": {
                "type": "string",
                "enum": ["RelatedTo", "RelevantTo", "SimilarTo"]
              },
              "source": { "type": "string" },
              "target": { "type": "string" }
            },
            "required": ["type", "source", "target"],
            "additionalProperties": false
          }
        }
      },
      "required": ["knowledge_entities", "relationships"],
      "additionalProperties": false
    });
    let response_format = async_openai::types::ResponseFormat::JsonSchema {
        json_schema: async_openai::types::ResponseFormatJsonSchema {
            description: Some("Structured analysis of the submitted content".into()),
            name: "content_analysis".into(),
            schema: Some(schema),
            strict: Some(true),
        },
    };
    // Construct the system and user messages
    let system_message = r#"
            You are an expert document analyzer. You will receive a document's text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these!
            The JSON should have the following structure:
            {
                "knowledge_entities": [
                    {
                        "key": "unique-key-1",
                        "name": "Entity Name",
                        "description": "A detailed description of the entity.",
                        "entity_type": "TypeOfEntity"
                    },
                    // More entities...
                ],
                "relationships": [
                    {
                        "type": "RelationshipType",
                        "source": "unique-key-1 or UUID from existing database",
                        "target": "unique-key-1 or UUID from existing database"
                    },
                    // More relationships...
                ]
            }
            Guidelines:
            1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.
            2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.
            3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
            4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
            5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
            6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.
            7. Only create relationships between existing KnowledgeEntities.
            8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.
            9. A new relationship MUST include a newly created KnowledgeEntity.
            "#;
    let user_message = format!(
        "Category:\n{}\nInstructions:\n{}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}",
        category, instructions, text, closest_entities_to_llm
    );
    debug!("{}", user_message);
    // Build the chat completion request
    let request = CreateChatCompletionRequestArgs::default()
        .model("gpt-4o-mini")
        .temperature(0.2)
        .max_tokens(2048u32)
        .messages([
            ChatCompletionRequestSystemMessage::from(system_message).into(),
            ChatCompletionRequestUserMessage::from(user_message).into(),
        ])
        .response_format(response_format)
        .build()?;
    // Send the request to OpenAI
    let response = openai_client.chat().create(request).await?;
    debug!("{:?}", response);
    response
        .choices
        .first()
        .and_then(|choice| choice.message.content.as_ref())
        .ok_or(ProcessingError::LLMParsingError(
            "No content found in LLM response".into(),
        ))
        .and_then(|content| {
            serde_json::from_str(content).map_err(|e| {
                ProcessingError::LLMParsingError(format!(
                    "Failed to parse LLM response into analysis: {}",
                    e
                ))
            })
        })
 }
@@ -1,2 +1 @@
 pub mod embedding;
 pub mod llm;
`@@ -1,2 +1 @@`
	`pub mod embedding;`	`pub mod embedding;`
	`pub mod llm;`