mirror of
https://github.com/perstarkse/minne.git
synced 2026-07-04 20:11:42 +02:00
release: 1.0.5
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "retrieval-pipeline"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
license = "AGPL-3.0-or-later"
|
||||
|
||||
[lints]
|
||||
|
||||
@@ -9,11 +9,11 @@ use async_openai::{
|
||||
},
|
||||
};
|
||||
use common::storage::types::{
|
||||
message::{format_history, Message},
|
||||
message::{Message, format_history},
|
||||
system_settings::SystemSettings,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
use serde_json::{json, Value};
|
||||
use serde_json::{Value, json};
|
||||
|
||||
/// JSON schema describing the structured chat answer (answer text + references).
|
||||
fn get_query_response_schema() -> Value {
|
||||
@@ -62,16 +62,18 @@ impl LLMResponseFormat {
|
||||
pub fn chunks_to_chat_context(chunks: &[crate::RetrievedChunk]) -> Value {
|
||||
use crate::round_score;
|
||||
|
||||
serde_json::json!(chunks
|
||||
.iter()
|
||||
.map(|chunk| {
|
||||
serde_json::json!({
|
||||
"id": chunk.chunk.id,
|
||||
"content": chunk.chunk.chunk,
|
||||
"score": round_score(chunk.score),
|
||||
serde_json::json!(
|
||||
chunks
|
||||
.iter()
|
||||
.map(|chunk| {
|
||||
serde_json::json!({
|
||||
"id": chunk.chunk.id,
|
||||
"content": chunk.chunk.chunk,
|
||||
"score": round_score(chunk.score),
|
||||
})
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
.collect::<Vec<_>>()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn create_user_message_with_history(
|
||||
|
||||
@@ -33,11 +33,11 @@ pub enum RetrievalOutput {
|
||||
}
|
||||
|
||||
pub use pipeline::{
|
||||
retrieved_entities_to_json, Diagnostics, RetrievalConfig, RetrievalParams, RetrievalTuning,
|
||||
StageKind, StageTimings,
|
||||
Diagnostics, RetrievalConfig, RetrievalParams, RetrievalTuning, StageKind, StageTimings,
|
||||
retrieved_entities_to_json,
|
||||
};
|
||||
pub use query::normalize_fts_terms;
|
||||
pub use scoring::{reciprocal_rank_fusion, RrfConfig, Scored};
|
||||
pub use scoring::{RrfConfig, Scored, reciprocal_rank_fusion};
|
||||
|
||||
/// Round a score to three decimal places for JSON output.
|
||||
pub(crate) fn round_score(value: f32) -> f64 {
|
||||
|
||||
@@ -6,12 +6,12 @@ use common::{
|
||||
|
||||
use crate::scoring::Scored;
|
||||
|
||||
use crate::{reranking::RerankerLease, RetrievedChunk, RetrievedEntity};
|
||||
use crate::{RetrievedChunk, RetrievedEntity, reranking::RerankerLease};
|
||||
|
||||
use super::{
|
||||
RetrievalParams, StageKind, StageTimings,
|
||||
config::RetrievalConfig,
|
||||
diagnostics::{AssembleStats, Diagnostics, SearchStats},
|
||||
RetrievalParams, StageKind, StageTimings,
|
||||
};
|
||||
|
||||
/// Mutable working state threaded through every retrieval stage.
|
||||
|
||||
@@ -6,7 +6,7 @@ mod stages;
|
||||
pub use config::{RetrievalConfig, RetrievalTuning};
|
||||
pub use diagnostics::Diagnostics;
|
||||
|
||||
use crate::{round_score, RetrievalOutput, RetrievedEntity};
|
||||
use crate::{RetrievalOutput, RetrievedEntity, round_score};
|
||||
use async_trait::async_trait;
|
||||
use common::{error::AppError, storage::db::SurrealDbClient};
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -188,23 +188,25 @@ pub async fn run_with_embedding_instrumented(
|
||||
}
|
||||
|
||||
pub fn retrieved_entities_to_json(entities: &[RetrievedEntity]) -> serde_json::Value {
|
||||
serde_json::json!(entities
|
||||
.iter()
|
||||
.map(|entry| {
|
||||
serde_json::json!({
|
||||
"KnowledgeEntity": {
|
||||
"id": entry.entity.id,
|
||||
"name": entry.entity.name,
|
||||
"description": entry.entity.description,
|
||||
"score": round_score(entry.score),
|
||||
"chunks": entry.chunks.iter().map(|chunk| {
|
||||
serde_json::json!({
|
||||
"score": round_score(chunk.score),
|
||||
"content": chunk.chunk.chunk
|
||||
})
|
||||
}).collect::<Vec<_>>()
|
||||
}
|
||||
serde_json::json!(
|
||||
entities
|
||||
.iter()
|
||||
.map(|entry| {
|
||||
serde_json::json!({
|
||||
"KnowledgeEntity": {
|
||||
"id": entry.entity.id,
|
||||
"name": entry.entity.name,
|
||||
"description": entry.entity.description,
|
||||
"score": round_score(entry.score),
|
||||
"chunks": entry.chunks.iter().map(|chunk| {
|
||||
serde_json::json!({
|
||||
"score": round_score(chunk.score),
|
||||
"content": chunk.chunk.chunk
|
||||
})
|
||||
}).collect::<Vec<_>>()
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
.collect::<Vec<_>>()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -8,16 +8,16 @@ use std::{collections::HashMap, fmt::Write, sync::Arc};
|
||||
use tracing::{debug, instrument, warn};
|
||||
|
||||
use crate::{
|
||||
query::normalize_fts_terms,
|
||||
scoring::{clamp_unit, min_max_normalize, reciprocal_rank_fusion, RrfConfig, Scored},
|
||||
RetrievedChunk, RetrievedEntity,
|
||||
query::normalize_fts_terms,
|
||||
scoring::{RrfConfig, Scored, clamp_unit, min_max_normalize, reciprocal_rank_fusion},
|
||||
};
|
||||
|
||||
use super::{
|
||||
Stage, StageKind,
|
||||
config::RetrievalTuning,
|
||||
context::PipelineContext,
|
||||
diagnostics::{AssembleStats, SearchStats},
|
||||
Stage, StageKind,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
|
||||
@@ -2,8 +2,8 @@ use std::{
|
||||
env, fs,
|
||||
path::{Path, PathBuf},
|
||||
sync::{
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
Arc, Mutex,
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
},
|
||||
thread::available_parallelism,
|
||||
};
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::{hash_map::Entry, HashMap},
|
||||
collections::{HashMap, hash_map::Entry},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use common::storage::types::{
|
||||
knowledge_entity::KnowledgeEntity, text_chunk::TextChunk, StoredObject,
|
||||
StoredObject, knowledge_entity::KnowledgeEntity, text_chunk::TextChunk,
|
||||
};
|
||||
|
||||
/// Identifier access for retrieval fusion and sorting.
|
||||
|
||||
Reference in New Issue
Block a user