comments, llm schema

This commit is contained in:
Per Stark
2024-10-01 19:01:29 +02:00
parent dcb82ca454
commit 779b32f807
15 changed files with 442 additions and 72 deletions

View File

@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
use super::{ingress_content::IngressContentError, text_content::TextContent};
/// Knowledge object type, containing the content or reference to it, as well as metadata
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum IngressObject {
Url {
@@ -21,7 +22,15 @@ pub enum IngressObject {
category: String,
},
}
impl IngressObject {
/// Creates a new `TextContent` instance from a `IngressObject`.
///
/// # Arguments
/// `&self` - A reference to the `IngressObject`.
///
/// # Returns
/// `TextContent` - An object containing a text representation of the object, could be a scraped URL, parsed PDF, etc.
pub async fn to_text_content(&self) -> Result<TextContent, IngressContentError> {
match self {
IngressObject::Url { url, instructions, category } => {
@@ -66,6 +75,11 @@ impl IngressObject {
let content = tokio::fs::read_to_string(&file_info.path).await?;
Ok(content)
}
"text/markdown" => {
// Read the file and return its content
let content = tokio::fs::read_to_string(&file_info.path).await?;
Ok(content)
}
"application/pdf" => {
// TODO: Implement PDF text extraction using a crate like `pdf-extract` or `lopdf`
Err(IngressContentError::UnsupportedMime(file_info.mime_type.clone()))