mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-24 18:31:45 +01:00
refactor: renamed instructions to context
This commit is contained in:
@@ -16,7 +16,7 @@ use crate::{api_state::ApiState, error::ApiError};
|
||||
#[derive(Debug, TryFromMultipart)]
|
||||
pub struct IngestParams {
|
||||
pub content: Option<String>,
|
||||
pub instructions: String,
|
||||
pub context: String,
|
||||
pub category: String,
|
||||
#[form_data(limit = "10000000")] // Adjust limit as needed
|
||||
#[form_data(default)]
|
||||
@@ -40,7 +40,7 @@ pub async fn ingest_data(
|
||||
|
||||
let payloads = IngestionPayload::create_ingestion_payload(
|
||||
input.content,
|
||||
input.instructions,
|
||||
input.context,
|
||||
input.category,
|
||||
file_infos,
|
||||
user.id.as_str(),
|
||||
|
||||
@@ -14,6 +14,6 @@ CREATE system_settings:current CONTENT {
|
||||
query_model: "gpt-4o-mini",
|
||||
processing_model: "gpt-4o-mini",
|
||||
query_system_prompt: "You are a knowledgeable assistant with access to a specialized knowledge base. You will be provided with relevant knowledge entities from the database as context. Each knowledge entity contains a name, description, and type, representing different concepts, ideas, and information.\nYour task is to:\n1. Carefully analyze the provided knowledge entities in the context\n2. Answer user questions based on this information\n3. Provide clear, concise, and accurate responses\n4. When referencing information, briefly mention which knowledge entity it came from\n5. If the provided context doesn't contain enough information to answer the question confidently, clearly state this\n6. If only partial information is available, explain what you can answer and what information is missing\n7. Avoid making assumptions or providing information not supported by the context\n8. Output the references to the documents. Use the UUIDs and make sure they are correct!\nRemember:\n- Be direct and honest about the limitations of your knowledge\n- Cite the relevant knowledge entities when providing information, but only provide the UUIDs in the reference array\n- If you need to combine information from multiple entities, explain how they connect\n- Don't speculate beyond what's provided in the context\nExample response formats:\n\"Based on [Entity Name], [answer...]\"\n\"I found relevant information in multiple entries: [explanation...]\"\n\"I apologize, but the provided context doesn't contain information about [topic]\"",
|
||||
ingestion_system_prompt: "You are an AI assistant. You will receive a text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users instructions and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.\nThe JSON should have the following structure:\n{\n\"knowledge_entities\": [\n{\n\"key\": \"unique-key-1\",\n\"name\": \"Entity Name\",\n\"description\": \"A detailed description of the entity.\",\n\"entity_type\": \"TypeOfEntity\"\n},\n// More entities...\n],\n\"relationships\": [\n{\n\"type\": \"RelationshipType\",\n\"source\": \"unique-key-1 or UUID from existing database\",\n\"target\": \"unique-key-1 or UUID from existing database\"\n},\n// More relationships...\n]\n}\nGuidelines:\n1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.\n2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.\n3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.\n4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.\n5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity\"\n6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.\n7. Only create relationships between existing KnowledgeEntities.\n8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.\n9. A new relationship MUST include a newly created KnowledgeEntity."
|
||||
ingestion_system_prompt: "You are an AI assistant. You will receive a text content, along with user context and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users context and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.\nThe JSON should have the following structure:\n{\n\"knowledge_entities\": [\n{\n\"key\": \"unique-key-1\",\n\"name\": \"Entity Name\",\n\"description\": \"A detailed description of the entity.\",\n\"entity_type\": \"TypeOfEntity\"\n},\n// More entities...\n],\n\"relationships\": [\n{\n\"type\": \"RelationshipType\",\n\"source\": \"unique-key-1 or UUID from existing database\",\n\"target\": \"unique-key-1 or UUID from existing database\"\n},\n// More relationships...\n]\n}\nGuidelines:\n1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.\n2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.\n3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.\n4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.\n5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity\"\n6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.\n7. Only create relationships between existing KnowledgeEntities.\n8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.\n9. A new relationship MUST include a newly created KnowledgeEntity."
|
||||
};
|
||||
END;
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
|
||||
DEFINE FIELD OVERWRITE instructions ON text_content TYPE option<string>;
|
||||
|
||||
UPDATE text_content SET context = instructions WHERE instructions IS NOT NONE;
|
||||
|
||||
UPDATE text_content UNSET instructions;
|
||||
|
||||
REMOVE FIELD instructions ON TABLE text_content;
|
||||
@@ -0,0 +1 @@
|
||||
{"schemas":"--- original\n+++ modified\n@@ -198,11 +198,11 @@\n DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;\n # UrlInfo is a struct, store as object\n DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;\n-DEFINE FIELD IF NOT EXISTS instructions ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;\n DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;\n DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;\n\n-# Indexes based on query patterns (get_latest_text_contents, get_text_contents_by_category)\n+# Indexes based on query patterns\n DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;\n DEFINE INDEX IF NOT EXISTS text_content_category_idx ON text_content FIELDS category;\n","events":null}
|
||||
@@ -12,11 +12,11 @@ DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
|
||||
# UrlInfo is a struct, store as object
|
||||
DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
|
||||
DEFINE FIELD IF NOT EXISTS instructions ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
|
||||
DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
|
||||
DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
|
||||
|
||||
# Indexes based on query patterns (get_latest_text_contents, get_text_contents_by_category)
|
||||
# Indexes based on query patterns
|
||||
DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;
|
||||
DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;
|
||||
DEFINE INDEX IF NOT EXISTS text_content_category_idx ON text_content FIELDS category;
|
||||
|
||||
@@ -7,30 +7,30 @@ use url::Url;
|
||||
pub enum IngestionPayload {
|
||||
Url {
|
||||
url: String,
|
||||
instructions: String,
|
||||
context: String,
|
||||
category: String,
|
||||
user_id: String,
|
||||
},
|
||||
Text {
|
||||
text: String,
|
||||
instructions: String,
|
||||
context: String,
|
||||
category: String,
|
||||
user_id: String,
|
||||
},
|
||||
File {
|
||||
file_info: FileInfo,
|
||||
instructions: String,
|
||||
context: String,
|
||||
category: String,
|
||||
user_id: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl IngestionPayload {
|
||||
/// Creates ingestion payloads from the provided content, instructions, and files.
|
||||
/// Creates ingestion payloads from the provided content, context, and files.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `content` - Optional textual content to be ingressed
|
||||
/// * `instructions` - Instructions for processing the ingress content
|
||||
/// * `context` - context for processing the ingress content
|
||||
/// * `category` - Category to classify the ingressed content
|
||||
/// * `files` - Vector of `FileInfo` objects containing information about uploaded files
|
||||
/// * `user_id` - Identifier of the user performing the ingress operation
|
||||
@@ -40,7 +40,7 @@ impl IngestionPayload {
|
||||
/// (one per file/content type). On failure, returns an `AppError`.
|
||||
pub fn create_ingestion_payload(
|
||||
content: Option<String>,
|
||||
instructions: String,
|
||||
context: String,
|
||||
category: String,
|
||||
files: Vec<FileInfo>,
|
||||
user_id: &str,
|
||||
@@ -55,7 +55,7 @@ impl IngestionPayload {
|
||||
info!("Detected URL: {}", url);
|
||||
object_list.push(IngestionPayload::Url {
|
||||
url: url.to_string(),
|
||||
instructions: instructions.clone(),
|
||||
context: context.clone(),
|
||||
category: category.clone(),
|
||||
user_id: user_id.into(),
|
||||
});
|
||||
@@ -65,7 +65,7 @@ impl IngestionPayload {
|
||||
info!("Treating input as plain text");
|
||||
object_list.push(IngestionPayload::Text {
|
||||
text: input_content.to_string(),
|
||||
instructions: instructions.clone(),
|
||||
context: context.clone(),
|
||||
category: category.clone(),
|
||||
user_id: user_id.into(),
|
||||
});
|
||||
@@ -77,7 +77,7 @@ impl IngestionPayload {
|
||||
for file in files {
|
||||
object_list.push(IngestionPayload::File {
|
||||
file_info: file,
|
||||
instructions: instructions.clone(),
|
||||
context: context.clone(),
|
||||
category: category.clone(),
|
||||
user_id: user_id.into(),
|
||||
})
|
||||
@@ -126,14 +126,14 @@ mod tests {
|
||||
#[test]
|
||||
fn test_create_ingestion_payload_with_url() {
|
||||
let url = "https://example.com";
|
||||
let instructions = "Process this URL";
|
||||
let context = "Process this URL";
|
||||
let category = "websites";
|
||||
let user_id = "user123";
|
||||
let files = vec![];
|
||||
|
||||
let result = IngestionPayload::create_ingestion_payload(
|
||||
Some(url.to_string()),
|
||||
instructions.to_string(),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
user_id,
|
||||
@@ -144,13 +144,13 @@ mod tests {
|
||||
match &result[0] {
|
||||
IngestionPayload::Url {
|
||||
url: payload_url,
|
||||
instructions: payload_instructions,
|
||||
context: payload_context,
|
||||
category: payload_category,
|
||||
user_id: payload_user_id,
|
||||
} => {
|
||||
// URL parser may normalize the URL by adding a trailing slash
|
||||
assert!(payload_url == &url.to_string() || payload_url == &format!("{}/", url));
|
||||
assert_eq!(payload_instructions, &instructions);
|
||||
assert_eq!(payload_context, &context);
|
||||
assert_eq!(payload_category, &category);
|
||||
assert_eq!(payload_user_id, &user_id);
|
||||
}
|
||||
@@ -161,14 +161,14 @@ mod tests {
|
||||
#[test]
|
||||
fn test_create_ingestion_payload_with_text() {
|
||||
let text = "This is some text content";
|
||||
let instructions = "Process this text";
|
||||
let context = "Process this text";
|
||||
let category = "notes";
|
||||
let user_id = "user123";
|
||||
let files = vec![];
|
||||
|
||||
let result = IngestionPayload::create_ingestion_payload(
|
||||
Some(text.to_string()),
|
||||
instructions.to_string(),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
user_id,
|
||||
@@ -179,12 +179,12 @@ mod tests {
|
||||
match &result[0] {
|
||||
IngestionPayload::Text {
|
||||
text: payload_text,
|
||||
instructions: payload_instructions,
|
||||
context: payload_context,
|
||||
category: payload_category,
|
||||
user_id: payload_user_id,
|
||||
} => {
|
||||
assert_eq!(payload_text, text);
|
||||
assert_eq!(payload_instructions, instructions);
|
||||
assert_eq!(payload_context, context);
|
||||
assert_eq!(payload_category, category);
|
||||
assert_eq!(payload_user_id, user_id);
|
||||
}
|
||||
@@ -194,7 +194,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_create_ingestion_payload_with_file() {
|
||||
let instructions = "Process this file";
|
||||
let context = "Process this file";
|
||||
let category = "documents";
|
||||
let user_id = "user123";
|
||||
|
||||
@@ -208,7 +208,7 @@ mod tests {
|
||||
|
||||
let result = IngestionPayload::create_ingestion_payload(
|
||||
None,
|
||||
instructions.to_string(),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
user_id,
|
||||
@@ -219,12 +219,12 @@ mod tests {
|
||||
match &result[0] {
|
||||
IngestionPayload::File {
|
||||
file_info: payload_file_info,
|
||||
instructions: payload_instructions,
|
||||
context: payload_context,
|
||||
category: payload_category,
|
||||
user_id: payload_user_id,
|
||||
} => {
|
||||
assert_eq!(payload_file_info.id, file_info.id);
|
||||
assert_eq!(payload_instructions, instructions);
|
||||
assert_eq!(payload_context, context);
|
||||
assert_eq!(payload_category, category);
|
||||
assert_eq!(payload_user_id, user_id);
|
||||
}
|
||||
@@ -235,7 +235,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_create_ingestion_payload_with_url_and_file() {
|
||||
let url = "https://example.com";
|
||||
let instructions = "Process this data";
|
||||
let context = "Process this data";
|
||||
let category = "mixed";
|
||||
let user_id = "user123";
|
||||
|
||||
@@ -249,7 +249,7 @@ mod tests {
|
||||
|
||||
let result = IngestionPayload::create_ingestion_payload(
|
||||
Some(url.to_string()),
|
||||
instructions.to_string(),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
user_id,
|
||||
@@ -283,14 +283,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_create_ingestion_payload_empty_input() {
|
||||
let instructions = "Process something";
|
||||
let context = "Process something";
|
||||
let category = "empty";
|
||||
let user_id = "user123";
|
||||
let files = vec![];
|
||||
|
||||
let result = IngestionPayload::create_ingestion_payload(
|
||||
None,
|
||||
instructions.to_string(),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
user_id,
|
||||
@@ -308,14 +308,14 @@ mod tests {
|
||||
#[test]
|
||||
fn test_create_ingestion_payload_with_empty_text() {
|
||||
let text = ""; // Empty text
|
||||
let instructions = "Process this";
|
||||
let context = "Process this";
|
||||
let category = "notes";
|
||||
let user_id = "user123";
|
||||
let files = vec![];
|
||||
|
||||
let result = IngestionPayload::create_ingestion_payload(
|
||||
Some(text.to_string()),
|
||||
instructions.to_string(),
|
||||
context.to_string(),
|
||||
category.to_string(),
|
||||
files,
|
||||
user_id,
|
||||
|
||||
@@ -110,7 +110,7 @@ mod tests {
|
||||
fn create_test_payload(user_id: &str) -> IngestionPayload {
|
||||
IngestionPayload::Text {
|
||||
text: "Test content".to_string(),
|
||||
instructions: "Test instructions".to_string(),
|
||||
context: "Test context".to_string(),
|
||||
category: "Test category".to_string(),
|
||||
user_id: user_id.to_string(),
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ Example response formats:
|
||||
"I found relevant information in multiple entries: [explanation...]"
|
||||
"I apologize, but the provided context doesn't contain information about [topic]""#;
|
||||
|
||||
pub static DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT: &str = r#"You are an AI assistant. You will receive a text content, along with user instructions and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users instructions and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.
|
||||
pub static DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT: &str = r#"You are an AI assistant. You will receive a text content, along with user context and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users context and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.
|
||||
|
||||
The JSON should have the following structure:
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ stored_object!(TextContent, "text_content", {
|
||||
text: String,
|
||||
file_info: Option<FileInfo>,
|
||||
url_info: Option<UrlInfo>,
|
||||
instructions: String,
|
||||
context: Option<String>,
|
||||
category: String,
|
||||
user_id: String
|
||||
});
|
||||
@@ -24,7 +24,7 @@ stored_object!(TextContent, "text_content", {
|
||||
impl TextContent {
|
||||
pub fn new(
|
||||
text: String,
|
||||
instructions: String,
|
||||
context: Option<String>,
|
||||
category: String,
|
||||
file_info: Option<FileInfo>,
|
||||
url_info: Option<UrlInfo>,
|
||||
@@ -38,7 +38,7 @@ impl TextContent {
|
||||
text,
|
||||
file_info,
|
||||
url_info,
|
||||
instructions,
|
||||
context,
|
||||
category,
|
||||
user_id,
|
||||
}
|
||||
@@ -46,7 +46,7 @@ impl TextContent {
|
||||
|
||||
pub async fn patch(
|
||||
id: &str,
|
||||
instructions: &str,
|
||||
context: &str,
|
||||
category: &str,
|
||||
text: &str,
|
||||
db: &SurrealDbClient,
|
||||
@@ -55,7 +55,7 @@ impl TextContent {
|
||||
|
||||
let _res: Option<Self> = db
|
||||
.update((Self::table_name(), id))
|
||||
.patch(PatchOp::replace("/instructions", instructions))
|
||||
.patch(PatchOp::replace("/context", context))
|
||||
.patch(PatchOp::replace("/category", category))
|
||||
.patch(PatchOp::replace("/text", text))
|
||||
.patch(PatchOp::replace("/updated_at", now))
|
||||
@@ -73,13 +73,13 @@ mod tests {
|
||||
async fn test_text_content_creation() {
|
||||
// Test basic object creation
|
||||
let text = "Test content text".to_string();
|
||||
let instructions = "Test instructions".to_string();
|
||||
let context = "Test context".to_string();
|
||||
let category = "Test category".to_string();
|
||||
let user_id = "user123".to_string();
|
||||
|
||||
let text_content = TextContent::new(
|
||||
text.clone(),
|
||||
instructions.clone(),
|
||||
Some(context.clone()),
|
||||
category.clone(),
|
||||
None,
|
||||
None,
|
||||
@@ -88,7 +88,7 @@ mod tests {
|
||||
|
||||
// Check that the fields are set correctly
|
||||
assert_eq!(text_content.text, text);
|
||||
assert_eq!(text_content.instructions, instructions);
|
||||
assert_eq!(text_content.context, Some(context));
|
||||
assert_eq!(text_content.category, category);
|
||||
assert_eq!(text_content.user_id, user_id);
|
||||
assert!(text_content.file_info.is_none());
|
||||
@@ -100,7 +100,7 @@ mod tests {
|
||||
async fn test_text_content_with_url() {
|
||||
// Test creating with URL
|
||||
let text = "Content with URL".to_string();
|
||||
let instructions = "URL instructions".to_string();
|
||||
let context = "URL context".to_string();
|
||||
let category = "URL category".to_string();
|
||||
let user_id = "user123".to_string();
|
||||
let title = "page_title".to_string();
|
||||
@@ -115,7 +115,7 @@ mod tests {
|
||||
|
||||
let text_content = TextContent::new(
|
||||
text.clone(),
|
||||
instructions.clone(),
|
||||
Some(context.clone()),
|
||||
category.clone(),
|
||||
None,
|
||||
url_info.clone(),
|
||||
@@ -137,13 +137,13 @@ mod tests {
|
||||
|
||||
// Create initial text content
|
||||
let initial_text = "Initial text".to_string();
|
||||
let initial_instructions = "Initial instructions".to_string();
|
||||
let initial_context = "Initial context".to_string();
|
||||
let initial_category = "Initial category".to_string();
|
||||
let user_id = "user123".to_string();
|
||||
|
||||
let text_content = TextContent::new(
|
||||
initial_text,
|
||||
initial_instructions,
|
||||
Some(initial_context),
|
||||
initial_category,
|
||||
None,
|
||||
None,
|
||||
@@ -158,20 +158,14 @@ mod tests {
|
||||
assert!(stored.is_some());
|
||||
|
||||
// New values for patch
|
||||
let new_instructions = "Updated instructions";
|
||||
let new_context = "Updated context";
|
||||
let new_category = "Updated category";
|
||||
let new_text = "Updated text content";
|
||||
|
||||
// Apply the patch
|
||||
TextContent::patch(
|
||||
&text_content.id,
|
||||
new_instructions,
|
||||
new_category,
|
||||
new_text,
|
||||
&db,
|
||||
)
|
||||
.await
|
||||
.expect("Failed to patch text content");
|
||||
TextContent::patch(&text_content.id, new_context, new_category, new_text, &db)
|
||||
.await
|
||||
.expect("Failed to patch text content");
|
||||
|
||||
// Retrieve the updated content
|
||||
let updated: Option<TextContent> = db
|
||||
@@ -183,7 +177,7 @@ mod tests {
|
||||
let updated_content = updated.unwrap();
|
||||
|
||||
// Verify the updates
|
||||
assert_eq!(updated_content.instructions, new_instructions);
|
||||
assert_eq!(updated_content.context, Some(new_context.to_string()));
|
||||
assert_eq!(updated_content.category, new_category);
|
||||
assert_eq!(updated_content.text, new_text);
|
||||
assert!(updated_content.updated_at > text_content.updated_at);
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -94,7 +94,7 @@ pub async fn show_text_content_edit_form(
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct PatchTextContentParams {
|
||||
instructions: String,
|
||||
context: String,
|
||||
category: String,
|
||||
text: String,
|
||||
}
|
||||
@@ -106,14 +106,7 @@ pub async fn patch_text_content(
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
User::get_and_validate_text_content(&id, &user.id, &state.db).await?;
|
||||
|
||||
TextContent::patch(
|
||||
&id,
|
||||
&form.instructions,
|
||||
&form.category,
|
||||
&form.text,
|
||||
&state.db,
|
||||
)
|
||||
.await?;
|
||||
TextContent::patch(&id, &form.context, &form.category, &form.text, &state.db).await?;
|
||||
|
||||
let text_contents = User::get_text_contents(&user.id, &state.db).await?;
|
||||
let categories = User::get_user_categories(&user.id, &state.db).await?;
|
||||
|
||||
@@ -29,7 +29,7 @@ use crate::html_state::HtmlState;
|
||||
#[derive(Serialize)]
|
||||
pub struct IndexPageData {
|
||||
user: Option<User>,
|
||||
latest_text_contents: Vec<TextContent>,
|
||||
text_contents: Vec<TextContent>,
|
||||
active_jobs: Vec<IngestionTask>,
|
||||
conversation_archive: Vec<Conversation>,
|
||||
}
|
||||
@@ -39,20 +39,12 @@ pub async fn index_handler(
|
||||
auth: AuthSessionType,
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
let Some(user) = auth.current_user else {
|
||||
return Ok(TemplateResponse::new_template(
|
||||
"index/index.html",
|
||||
IndexPageData {
|
||||
user: None,
|
||||
latest_text_contents: vec![],
|
||||
active_jobs: vec![],
|
||||
conversation_archive: vec![],
|
||||
},
|
||||
));
|
||||
return Ok(TemplateResponse::redirect("/"));
|
||||
};
|
||||
|
||||
let active_jobs = User::get_unfinished_ingestion_tasks(&user.id, &state.db).await?;
|
||||
|
||||
let latest_text_contents = User::get_latest_text_contents(&user.id, &state.db).await?;
|
||||
let text_contents = User::get_latest_text_contents(&user.id, &state.db).await?;
|
||||
|
||||
let conversation_archive = User::get_user_conversations(&user.id, &state.db).await?;
|
||||
|
||||
@@ -60,7 +52,7 @@ pub async fn index_handler(
|
||||
"index/index.html",
|
||||
IndexPageData {
|
||||
user: Some(user),
|
||||
latest_text_contents,
|
||||
text_contents,
|
||||
active_jobs,
|
||||
conversation_archive,
|
||||
},
|
||||
|
||||
@@ -54,7 +54,7 @@ pub async fn hide_ingress_form(
|
||||
#[derive(Debug, TryFromMultipart)]
|
||||
pub struct IngressParams {
|
||||
pub content: Option<String>,
|
||||
pub instructions: String,
|
||||
pub context: String,
|
||||
pub category: String,
|
||||
#[form_data(limit = "10000000")] // Adjust limit as needed
|
||||
#[form_data(default)]
|
||||
@@ -68,7 +68,7 @@ pub async fn process_ingress_form(
|
||||
) -> Result<impl IntoResponse, HtmlError> {
|
||||
#[derive(Serialize)]
|
||||
pub struct IngressFormData {
|
||||
instructions: String,
|
||||
context: String,
|
||||
content: String,
|
||||
category: String,
|
||||
error: String,
|
||||
@@ -78,7 +78,7 @@ pub async fn process_ingress_form(
|
||||
return Ok(TemplateResponse::new_template(
|
||||
"index/signed_in/ingress_form.html",
|
||||
IngressFormData {
|
||||
instructions: input.instructions.clone(),
|
||||
context: input.context.clone(),
|
||||
content: input.content.clone().unwrap_or_default(),
|
||||
category: input.category.clone(),
|
||||
error: "You need to either add files or content".to_string(),
|
||||
@@ -98,7 +98,7 @@ pub async fn process_ingress_form(
|
||||
|
||||
let payloads = IngestionPayload::create_ingestion_payload(
|
||||
input.content,
|
||||
input.instructions,
|
||||
input.context,
|
||||
input.category,
|
||||
file_infos,
|
||||
user.id.as_str(),
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<div class="grid sm:grid-cols-2 lg:grid-cols-3 gap-4" id="text_content_cards">
|
||||
<div class="columns-1 lg:columns-2 2xl:columns-3 gap-4" id="text_content_cards">
|
||||
{% for text_content in text_contents %}
|
||||
<div class="card min-w-72 bg-base-100 shadow" hx-get="/content/{{ text_content.id }}/read" hx-target="#modal"
|
||||
hx-swap="innerHTML">
|
||||
<div class="card cursor-pointer min-w-72 mb-4 bg-base-100 shadow break-inside-avoid-column"
|
||||
hx-get="/content/{{ text_content.id }}/read" hx-target="#modal" hx-swap="innerHTML">
|
||||
{% if text_content.url_info %}
|
||||
<figure>
|
||||
<img src="/file/{{text_content.url_info.image_id}}" alt="website screenshot" />
|
||||
@@ -24,10 +24,10 @@
|
||||
{{ text_content.created_at | datetimeformat(format="short", tz=user.timezone) }}
|
||||
</p>
|
||||
<div class="badge badge-soft badge-secondary mr-2">{{ text_content.category }}</div>
|
||||
<div class="flex gap-2">
|
||||
<div class="flex gap-2" hx-on:click="event.stopPropagation()">
|
||||
{% if text_content.url_info %}
|
||||
<button class="btn-btn-square btn-ghost btn-sm">
|
||||
<a href="{{text_content.url_info.url}}">
|
||||
<a href="{{text_content.url_info.url}}" target="_blank" rel="noopener noreferrer">
|
||||
{% include "icons/globe_icon.html" %}
|
||||
</a>
|
||||
</button>
|
||||
|
||||
@@ -15,8 +15,8 @@ class="flex flex-col flex-1 h-full"
|
||||
<h3 class="text-lg font-bold">Edit Content</h3>
|
||||
<div class="form-control">
|
||||
<label class="floating-label">
|
||||
<span class="label-text">Instructions</span>
|
||||
<input type="text" name="instructions" value="{{ text_content.instructions }}" class="w-full input input-bordered">
|
||||
<span class="label-text">Context</span>
|
||||
<input type="text" name="context" value="{{ text_content.context }}" class="w-full input input-bordered">
|
||||
</label>
|
||||
</div>
|
||||
<div class="form-control">
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
{% extends "body_base.html" %}
|
||||
{% block main %}
|
||||
{% if user %}
|
||||
{% include 'index/signed_in/base.html' %}
|
||||
{% else %}
|
||||
{% include 'auth/signin_form.html' %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -1,7 +1,12 @@
|
||||
<div class="flex grow mt-2 sm:mt-4">
|
||||
<div class="flex justify-center grow mt-2 sm:mt-4">
|
||||
<div class="container">
|
||||
{% include 'index/signed_in/searchbar.html' %}
|
||||
|
||||
<div>
|
||||
<h2 class="font-extrabold">Recent content</h2>
|
||||
{% include "content/content_list.html" %}
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 shadow my-10">
|
||||
{% include "index/signed_in/active_jobs.html" %}
|
||||
|
||||
|
||||
@@ -7,18 +7,17 @@ enctype="multipart/form-data"
|
||||
<h3 class="text-lg font-bold">Add new content</h3>
|
||||
<div class="form-control">
|
||||
<label class="floating-label">
|
||||
<span>Instructions</span>
|
||||
<textarea name="instructions" class="textarea w-full validator"
|
||||
placeholder="Enter instructions for the AI here, help it understand what its seeing or how it should relate to the database"
|
||||
required>{{ instructions }}</textarea>
|
||||
<div class="validator-hint hidden">Instructions are required</div>
|
||||
<span>Content</span>
|
||||
<textarea name="content" class="textarea input-bordered w-full"
|
||||
placeholder="Enter the content you want to ingest, it can be an URL or a text snippet">{{ content }}</textarea>
|
||||
</label>
|
||||
</div>
|
||||
<div class="form-control">
|
||||
<label class="floating-label">
|
||||
<span>Content</span>
|
||||
<textarea name="content" class="textarea input-bordered w-full"
|
||||
placeholder="Enter the content you want to ingress, it can be an URL or a text snippet">{{ content }}</textarea>
|
||||
<span>Context</span>
|
||||
<textarea name="context" class="textarea w-full"
|
||||
placeholder="Enter context for the AI here, help it understand what its seeing or how it should relate to the database">{{
|
||||
context }}</textarea>
|
||||
</label>
|
||||
</div>
|
||||
<div class="form-control">
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{% block latest_content_section %}
|
||||
<ul id="latest_content_section" class="list">
|
||||
<li class="py-4 text-center font-bold tracking-wide">Recently added content</li>
|
||||
{% for item in latest_text_contents %}
|
||||
{% for item in text_contents %}
|
||||
<li class="list-row">
|
||||
<div class="bg-accent rounded-box size-10 flex justify-center items-center text-accent-content">
|
||||
{% if item.url_info %}
|
||||
@@ -27,8 +27,8 @@
|
||||
<span class="badge badge-xs badge-accent ml-1">{{item.category}}</span>
|
||||
</div>
|
||||
</div>
|
||||
<p class="list-col-wrap text-xs [&:before]:content-['Instructions:_'] [&:before]:opacity-60">
|
||||
{{item.instructions}}
|
||||
<p class="list-col-wrap text-xs [&:before]:content-['Context:_'] [&:before]:opacity-60">
|
||||
{{item.context}}
|
||||
</p>
|
||||
<button class="btn btn-disabled btn-square btn-ghost btn-sm">
|
||||
{% include "icons/edit_icon.html" %}
|
||||
|
||||
@@ -40,17 +40,17 @@ impl IngestionEnricher {
|
||||
pub async fn analyze_content(
|
||||
&self,
|
||||
category: &str,
|
||||
instructions: &str,
|
||||
context: Option<&str>,
|
||||
text: &str,
|
||||
user_id: &str,
|
||||
) -> Result<LLMEnrichmentResult, AppError> {
|
||||
info!("getting similar entitities");
|
||||
let similar_entities = self
|
||||
.find_similar_entities(category, instructions, text, user_id)
|
||||
.find_similar_entities(category, context, text, user_id)
|
||||
.await?;
|
||||
info!("got similar entitities");
|
||||
let llm_request = self
|
||||
.prepare_llm_request(category, instructions, text, &similar_entities)
|
||||
.prepare_llm_request(category, context, text, &similar_entities)
|
||||
.await?;
|
||||
self.perform_analysis(llm_request).await
|
||||
}
|
||||
@@ -58,13 +58,13 @@ impl IngestionEnricher {
|
||||
async fn find_similar_entities(
|
||||
&self,
|
||||
category: &str,
|
||||
instructions: &str,
|
||||
context: Option<&str>,
|
||||
text: &str,
|
||||
user_id: &str,
|
||||
) -> Result<Vec<KnowledgeEntity>, AppError> {
|
||||
let input_text = format!(
|
||||
"content: {}, category: {}, user_instructions: {}",
|
||||
text, category, instructions
|
||||
"content: {}, category: {}, user_context: {:?}",
|
||||
text, category, context
|
||||
);
|
||||
|
||||
retrieve_entities(&self.db_client, &self.openai_client, &input_text, user_id).await
|
||||
@@ -73,7 +73,7 @@ impl IngestionEnricher {
|
||||
async fn prepare_llm_request(
|
||||
&self,
|
||||
category: &str,
|
||||
instructions: &str,
|
||||
context: Option<&str>,
|
||||
text: &str,
|
||||
similar_entities: &[KnowledgeEntity],
|
||||
) -> Result<CreateChatCompletionRequest, AppError> {
|
||||
@@ -93,8 +93,8 @@ impl IngestionEnricher {
|
||||
.collect::<Vec<_>>());
|
||||
|
||||
let user_message = format!(
|
||||
"Category:\n{}\nInstructions:\n{}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}",
|
||||
category, instructions, text, entities_json
|
||||
"Category:\n{}\ncontext:\n{:?}\nContent:\n{}\nExisting KnowledgeEntities in database:\n{}",
|
||||
category, context, text, entities_json
|
||||
);
|
||||
|
||||
debug!("Prepared LLM request message: {}", user_message);
|
||||
|
||||
@@ -113,7 +113,7 @@ impl IngestionPipeline {
|
||||
analyser
|
||||
.analyze_content(
|
||||
&content.category,
|
||||
&content.instructions,
|
||||
content.context.as_deref(),
|
||||
&content.text,
|
||||
&content.user_id,
|
||||
)
|
||||
|
||||
@@ -16,7 +16,7 @@ use common::{
|
||||
},
|
||||
};
|
||||
use dom_smoothie::{Article, Readability, TextMode};
|
||||
use headless_chrome::{Browser, LaunchOptionsBuilder};
|
||||
use headless_chrome::Browser;
|
||||
use std::io::{Seek, SeekFrom};
|
||||
use tempfile::NamedTempFile;
|
||||
use tracing::{error, info};
|
||||
@@ -28,14 +28,14 @@ pub async fn to_text_content(
|
||||
match ingestion_payload {
|
||||
IngestionPayload::Url {
|
||||
url,
|
||||
instructions,
|
||||
context,
|
||||
category,
|
||||
user_id,
|
||||
} => {
|
||||
let (article, file_info) = fetch_article_from_url(&url, db, &user_id).await?;
|
||||
Ok(TextContent::new(
|
||||
article.text_content.into(),
|
||||
instructions,
|
||||
Some(context),
|
||||
category,
|
||||
None,
|
||||
Some(UrlInfo {
|
||||
@@ -48,12 +48,12 @@ pub async fn to_text_content(
|
||||
}
|
||||
IngestionPayload::Text {
|
||||
text,
|
||||
instructions,
|
||||
context,
|
||||
category,
|
||||
user_id,
|
||||
} => Ok(TextContent::new(
|
||||
text,
|
||||
instructions,
|
||||
Some(context),
|
||||
category,
|
||||
None,
|
||||
None,
|
||||
@@ -61,14 +61,14 @@ pub async fn to_text_content(
|
||||
)),
|
||||
IngestionPayload::File {
|
||||
file_info,
|
||||
instructions,
|
||||
context,
|
||||
category,
|
||||
user_id,
|
||||
} => {
|
||||
let text = extract_text_from_file(&file_info).await?;
|
||||
Ok(TextContent::new(
|
||||
text,
|
||||
instructions,
|
||||
Some(context),
|
||||
category,
|
||||
Some(file_info),
|
||||
None,
|
||||
|
||||
Reference in New Issue
Block a user