wip vector search

This commit is contained in:
Per Stark
2024-11-14 19:47:02 +01:00
parent abae94a478
commit 2f6106d87c
2 changed files with 13 additions and 4 deletions

View File

@@ -50,8 +50,8 @@ impl TextContent {
// Store TextContent // Store TextContent
let db_client = SurrealDbClient::new().await?; let db_client = SurrealDbClient::new().await?;
// db_client.query("REMOVE INDEX embeddings ON knowledge_entity").await?; db_client.query("REMOVE INDEX embeddings ON knowledge_entity").await?;
db_client.query("DEFINE INDEX embeddings ON knowledge_entity FIELDS embedding").await?; db_client.query("DEFINE INDEX embeddings ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536").await?;
// db_client.query("REBUILD INDEX IF EXISTS embeddings ON knowledge_entity").await?; // db_client.query("REBUILD INDEX IF EXISTS embeddings ON knowledge_entity").await?;
// Step 1: Send to LLM for analysis // Step 1: Send to LLM for analysis

View File

@@ -156,6 +156,7 @@ pub async fn create_json_ld(
text: &str, text: &str,
db_client: &Surreal<Client>, db_client: &Surreal<Client>,
) -> Result<LLMGraphAnalysisResult, ProcessingError> { ) -> Result<LLMGraphAnalysisResult, ProcessingError> {
let client = async_openai::Client::new();
// Get the nodes from the database // Get the nodes from the database
let entities: Vec<KnowledgeEntity> = db_client let entities: Vec<KnowledgeEntity> = db_client
.query("SELECT * FROM knowledge_entity") .query("SELECT * FROM knowledge_entity")
@@ -165,6 +166,15 @@ pub async fn create_json_ld(
info!("{:?}, {:?}", entity.name, entity.description); info!("{:?}, {:?}", entity.name, entity.description);
} }
let text_embeddings = generate_embedding(&client,text.to_string()).await?;
let closest_query = format!("SELECT *, vector::distance:knn() AS distance FROM knowledge_entity WHERE embedding {:?} ORDER BY distance", text_embeddings);
info!("{:?}", closest_query);
let closest_entities: Vec<KnowledgeEntity> = db_client.query(closest_query).await?.take(0)?;
info!("{:?}", closest_entities);
let deleted: Vec<KnowledgeEntity> = db_client.delete("knowledge_entity").await?; let deleted: Vec<KnowledgeEntity> = db_client.delete("knowledge_entity").await?;
info! {"{:?} KnowledgeEntities deleted", deleted.len()}; info! {"{:?} KnowledgeEntities deleted", deleted.len()};
@@ -176,7 +186,6 @@ pub async fn create_json_ld(
db_client.delete("knowledge_relationship").await?; db_client.delete("knowledge_relationship").await?;
info!("{:?} Relationships deleted", relationships_deleted.len()); info!("{:?} Relationships deleted", relationships_deleted.len());
let client = async_openai::Client::new();
let schema = json!({ let schema = json!({
"type": "object", "type": "object",
"properties": { "properties": {
@@ -259,7 +268,7 @@ pub async fn create_json_ld(
3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet. 3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo. 4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity" 5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
6. Only create relationships between existing KnowledgeEntities. 7. Only create relationships between existing KnowledgeEntities.
"#; "#;
let user_message = format!( let user_message = format!(