release: 1.0.5

This commit is contained in:
Per Stark
2026-06-24 22:02:31 +02:00
parent ba3fd6ed46
commit d273390de8
118 changed files with 989 additions and 690 deletions
+1 -1
View File
@@ -1,7 +1,7 @@
[package]
name = "retrieval-pipeline"
version = "0.1.0"
edition = "2021"
edition = "2024"
license = "AGPL-3.0-or-later"
[lints]
+13 -11
View File
@@ -9,11 +9,11 @@ use async_openai::{
},
};
use common::storage::types::{
message::{format_history, Message},
message::{Message, format_history},
system_settings::SystemSettings,
};
use serde::Deserialize;
use serde_json::{json, Value};
use serde_json::{Value, json};
/// JSON schema describing the structured chat answer (answer text + references).
fn get_query_response_schema() -> Value {
@@ -62,16 +62,18 @@ impl LLMResponseFormat {
pub fn chunks_to_chat_context(chunks: &[crate::RetrievedChunk]) -> Value {
use crate::round_score;
serde_json::json!(chunks
.iter()
.map(|chunk| {
serde_json::json!({
"id": chunk.chunk.id,
"content": chunk.chunk.chunk,
"score": round_score(chunk.score),
serde_json::json!(
chunks
.iter()
.map(|chunk| {
serde_json::json!({
"id": chunk.chunk.id,
"content": chunk.chunk.chunk,
"score": round_score(chunk.score),
})
})
})
.collect::<Vec<_>>())
.collect::<Vec<_>>()
)
}
pub fn create_user_message_with_history(
+3 -3
View File
@@ -33,11 +33,11 @@ pub enum RetrievalOutput {
}
pub use pipeline::{
retrieved_entities_to_json, Diagnostics, RetrievalConfig, RetrievalParams, RetrievalTuning,
StageKind, StageTimings,
Diagnostics, RetrievalConfig, RetrievalParams, RetrievalTuning, StageKind, StageTimings,
retrieved_entities_to_json,
};
pub use query::normalize_fts_terms;
pub use scoring::{reciprocal_rank_fusion, RrfConfig, Scored};
pub use scoring::{RrfConfig, Scored, reciprocal_rank_fusion};
/// Round a score to three decimal places for JSON output.
pub(crate) fn round_score(value: f32) -> f64 {
+2 -2
View File
@@ -6,12 +6,12 @@ use common::{
use crate::scoring::Scored;
use crate::{reranking::RerankerLease, RetrievedChunk, RetrievedEntity};
use crate::{RetrievedChunk, RetrievedEntity, reranking::RerankerLease};
use super::{
RetrievalParams, StageKind, StageTimings,
config::RetrievalConfig,
diagnostics::{AssembleStats, Diagnostics, SearchStats},
RetrievalParams, StageKind, StageTimings,
};
/// Mutable working state threaded through every retrieval stage.
+21 -19
View File
@@ -6,7 +6,7 @@ mod stages;
pub use config::{RetrievalConfig, RetrievalTuning};
pub use diagnostics::Diagnostics;
use crate::{round_score, RetrievalOutput, RetrievedEntity};
use crate::{RetrievalOutput, RetrievedEntity, round_score};
use async_trait::async_trait;
use common::{error::AppError, storage::db::SurrealDbClient};
use std::time::{Duration, Instant};
@@ -188,23 +188,25 @@ pub async fn run_with_embedding_instrumented(
}
pub fn retrieved_entities_to_json(entities: &[RetrievedEntity]) -> serde_json::Value {
serde_json::json!(entities
.iter()
.map(|entry| {
serde_json::json!({
"KnowledgeEntity": {
"id": entry.entity.id,
"name": entry.entity.name,
"description": entry.entity.description,
"score": round_score(entry.score),
"chunks": entry.chunks.iter().map(|chunk| {
serde_json::json!({
"score": round_score(chunk.score),
"content": chunk.chunk.chunk
})
}).collect::<Vec<_>>()
}
serde_json::json!(
entities
.iter()
.map(|entry| {
serde_json::json!({
"KnowledgeEntity": {
"id": entry.entity.id,
"name": entry.entity.name,
"description": entry.entity.description,
"score": round_score(entry.score),
"chunks": entry.chunks.iter().map(|chunk| {
serde_json::json!({
"score": round_score(chunk.score),
"content": chunk.chunk.chunk
})
}).collect::<Vec<_>>()
}
})
})
})
.collect::<Vec<_>>())
.collect::<Vec<_>>()
)
}
+3 -3
View File
@@ -8,16 +8,16 @@ use std::{collections::HashMap, fmt::Write, sync::Arc};
use tracing::{debug, instrument, warn};
use crate::{
query::normalize_fts_terms,
scoring::{clamp_unit, min_max_normalize, reciprocal_rank_fusion, RrfConfig, Scored},
RetrievedChunk, RetrievedEntity,
query::normalize_fts_terms,
scoring::{RrfConfig, Scored, clamp_unit, min_max_normalize, reciprocal_rank_fusion},
};
use super::{
Stage, StageKind,
config::RetrievalTuning,
context::PipelineContext,
diagnostics::{AssembleStats, SearchStats},
Stage, StageKind,
};
#[derive(Debug, Clone, Copy)]
+1 -1
View File
@@ -2,8 +2,8 @@ use std::{
env, fs,
path::{Path, PathBuf},
sync::{
atomic::{AtomicUsize, Ordering},
Arc, Mutex,
atomic::{AtomicUsize, Ordering},
},
thread::available_parallelism,
};
+2 -2
View File
@@ -1,11 +1,11 @@
use std::{
cmp::Ordering,
collections::{hash_map::Entry, HashMap},
collections::{HashMap, hash_map::Entry},
sync::Arc,
};
use common::storage::types::{
knowledge_entity::KnowledgeEntity, text_chunk::TextChunk, StoredObject,
StoredObject, knowledge_entity::KnowledgeEntity, text_chunk::TextChunk,
};
/// Identifier access for retrieval fusion and sorting.