Files
minne/evaluations/manifest.yaml
2025-12-08 21:57:53 +01:00

169 lines
4.9 KiB
YAML

default_dataset: squad-v2
datasets:
- id: squad-v2
label: "SQuAD v2.0"
category: "SQuAD v2.0"
entity_suffix: "SQuAD"
source_prefix: "squad"
raw: "data/raw/squad/dev-v2.0.json"
converted: "data/converted/squad-minne.json"
include_unanswerable: false
slices:
- id: squad-dev-200
label: "SQuAD dev (200)"
description: "Deterministic 200-case slice for local eval"
limit: 200
corpus_limit: 2000
seed: 0x5eed2025
- id: natural-questions-dev
label: "Natural Questions (dev)"
category: "Natural Questions"
entity_suffix: "Natural Questions"
source_prefix: "nq"
raw: "data/raw/nq-dev/dev-all.jsonl"
converted: "data/converted/nq-dev-minne.json"
include_unanswerable: true
slices:
- id: nq-dev-200
label: "NQ dev (200)"
description: "200-case slice of the dev set"
limit: 200
corpus_limit: 2000
include_unanswerable: false
seed: 0x5eed2025
- id: beir
label: "BEIR mix"
category: "BEIR"
entity_suffix: "BEIR"
source_prefix: "beir"
raw: "data/raw/beir"
converted: "data/converted/beir-minne.json"
include_unanswerable: false
slices:
- id: beir-mix-600
label: "BEIR mix (600)"
description: "Balanced slice across FEVER, FiQA, HotpotQA, NFCorpus, Quora, TREC-COVID, SciFact, NQ-BEIR"
limit: 600
corpus_limit: 6000
seed: 0x5eed2025
- id: fever
label: "FEVER (BEIR)"
category: "FEVER"
entity_suffix: "FEVER"
source_prefix: "fever"
raw: "data/raw/fever"
converted: "data/converted/fever-minne.json"
include_unanswerable: false
slices:
- id: fever-test-200
label: "FEVER test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 5000
seed: 0x5eed2025
- id: fiqa
label: "FiQA-2018 (BEIR)"
category: "FiQA-2018"
entity_suffix: "FiQA"
source_prefix: "fiqa"
raw: "data/raw/fiqa"
converted: "data/converted/fiqa-minne.json"
include_unanswerable: false
slices:
- id: fiqa-test-200
label: "FiQA test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 5000
seed: 0x5eed2025
- id: hotpotqa
label: "HotpotQA (BEIR)"
category: "HotpotQA"
entity_suffix: "HotpotQA"
source_prefix: "hotpotqa"
raw: "data/raw/hotpotqa"
converted: "data/converted/hotpotqa-minne.json"
include_unanswerable: false
slices:
- id: hotpotqa-test-200
label: "HotpotQA test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 5000
seed: 0x5eed2025
- id: nfcorpus
label: "NFCorpus (BEIR)"
category: "NFCorpus"
entity_suffix: "NFCorpus"
source_prefix: "nfcorpus"
raw: "data/raw/nfcorpus"
converted: "data/converted/nfcorpus-minne.json"
include_unanswerable: false
slices:
- id: nfcorpus-test-200
label: "NFCorpus test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 5000
seed: 0x5eed2025
- id: quora
label: "Quora (IR)"
category: "Quora"
entity_suffix: "Quora"
source_prefix: "quora"
raw: "data/raw/quora"
converted: "data/converted/quora-minne.json"
include_unanswerable: false
slices:
- id: quora-test-200
label: "Quora test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 5000
seed: 0x5eed2025
- id: trec-covid
label: "TREC-COVID (BEIR)"
category: "TREC-COVID"
entity_suffix: "TREC-COVID"
source_prefix: "trec-covid"
raw: "data/raw/trec-covid"
converted: "data/converted/trec-covid-minne.json"
include_unanswerable: false
slices:
- id: trec-covid-test-200
label: "TREC-COVID test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 5000
seed: 0x5eed2025
- id: scifact
label: "SciFact (BEIR)"
category: "SciFact"
entity_suffix: "SciFact"
source_prefix: "scifact"
raw: "data/raw/scifact"
converted: "data/converted/scifact-minne.json"
include_unanswerable: false
slices:
- id: scifact-test-200
label: "SciFact test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 3000
seed: 0x5eed2025
- id: nq-beir
label: "Natural Questions (BEIR)"
category: "Natural Questions"
entity_suffix: "Natural Questions"
source_prefix: "nq-beir"
raw: "data/raw/nq"
converted: "data/converted/nq-beir-minne.json"
include_unanswerable: false
slices:
- id: nq-beir-test-200
label: "NQ (BEIR) test (200)"
description: "200-case slice from BEIR test qrels"
limit: 200
corpus_limit: 5000
seed: 0x5eed2025