mirror of
https://github.com/perstarkse/minne.git
synced 2026-03-20 08:34:31 +01:00
169 lines
4.9 KiB
YAML
169 lines
4.9 KiB
YAML
default_dataset: squad-v2
|
|
datasets:
|
|
- id: squad-v2
|
|
label: "SQuAD v2.0"
|
|
category: "SQuAD v2.0"
|
|
entity_suffix: "SQuAD"
|
|
source_prefix: "squad"
|
|
raw: "data/raw/squad/dev-v2.0.json"
|
|
converted: "data/converted/squad-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: squad-dev-200
|
|
label: "SQuAD dev (200)"
|
|
description: "Deterministic 200-case slice for local eval"
|
|
limit: 200
|
|
corpus_limit: 2000
|
|
seed: 0x5eed2025
|
|
- id: natural-questions-dev
|
|
label: "Natural Questions (dev)"
|
|
category: "Natural Questions"
|
|
entity_suffix: "Natural Questions"
|
|
source_prefix: "nq"
|
|
raw: "data/raw/nq-dev/dev-all.jsonl"
|
|
converted: "data/converted/nq-dev-minne.json"
|
|
include_unanswerable: true
|
|
slices:
|
|
- id: nq-dev-200
|
|
label: "NQ dev (200)"
|
|
description: "200-case slice of the dev set"
|
|
limit: 200
|
|
corpus_limit: 2000
|
|
include_unanswerable: false
|
|
seed: 0x5eed2025
|
|
- id: beir
|
|
label: "BEIR mix"
|
|
category: "BEIR"
|
|
entity_suffix: "BEIR"
|
|
source_prefix: "beir"
|
|
raw: "data/raw/beir"
|
|
converted: "data/converted/beir-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: beir-mix-600
|
|
label: "BEIR mix (600)"
|
|
description: "Balanced slice across FEVER, FiQA, HotpotQA, NFCorpus, Quora, TREC-COVID, SciFact, NQ-BEIR"
|
|
limit: 600
|
|
corpus_limit: 6000
|
|
seed: 0x5eed2025
|
|
- id: fever
|
|
label: "FEVER (BEIR)"
|
|
category: "FEVER"
|
|
entity_suffix: "FEVER"
|
|
source_prefix: "fever"
|
|
raw: "data/raw/fever"
|
|
converted: "data/converted/fever-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: fever-test-200
|
|
label: "FEVER test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 5000
|
|
seed: 0x5eed2025
|
|
- id: fiqa
|
|
label: "FiQA-2018 (BEIR)"
|
|
category: "FiQA-2018"
|
|
entity_suffix: "FiQA"
|
|
source_prefix: "fiqa"
|
|
raw: "data/raw/fiqa"
|
|
converted: "data/converted/fiqa-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: fiqa-test-200
|
|
label: "FiQA test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 5000
|
|
seed: 0x5eed2025
|
|
- id: hotpotqa
|
|
label: "HotpotQA (BEIR)"
|
|
category: "HotpotQA"
|
|
entity_suffix: "HotpotQA"
|
|
source_prefix: "hotpotqa"
|
|
raw: "data/raw/hotpotqa"
|
|
converted: "data/converted/hotpotqa-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: hotpotqa-test-200
|
|
label: "HotpotQA test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 5000
|
|
seed: 0x5eed2025
|
|
- id: nfcorpus
|
|
label: "NFCorpus (BEIR)"
|
|
category: "NFCorpus"
|
|
entity_suffix: "NFCorpus"
|
|
source_prefix: "nfcorpus"
|
|
raw: "data/raw/nfcorpus"
|
|
converted: "data/converted/nfcorpus-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: nfcorpus-test-200
|
|
label: "NFCorpus test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 5000
|
|
seed: 0x5eed2025
|
|
- id: quora
|
|
label: "Quora (IR)"
|
|
category: "Quora"
|
|
entity_suffix: "Quora"
|
|
source_prefix: "quora"
|
|
raw: "data/raw/quora"
|
|
converted: "data/converted/quora-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: quora-test-200
|
|
label: "Quora test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 5000
|
|
seed: 0x5eed2025
|
|
- id: trec-covid
|
|
label: "TREC-COVID (BEIR)"
|
|
category: "TREC-COVID"
|
|
entity_suffix: "TREC-COVID"
|
|
source_prefix: "trec-covid"
|
|
raw: "data/raw/trec-covid"
|
|
converted: "data/converted/trec-covid-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: trec-covid-test-200
|
|
label: "TREC-COVID test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 5000
|
|
seed: 0x5eed2025
|
|
- id: scifact
|
|
label: "SciFact (BEIR)"
|
|
category: "SciFact"
|
|
entity_suffix: "SciFact"
|
|
source_prefix: "scifact"
|
|
raw: "data/raw/scifact"
|
|
converted: "data/converted/scifact-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: scifact-test-200
|
|
label: "SciFact test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 3000
|
|
seed: 0x5eed2025
|
|
- id: nq-beir
|
|
label: "Natural Questions (BEIR)"
|
|
category: "Natural Questions"
|
|
entity_suffix: "Natural Questions"
|
|
source_prefix: "nq-beir"
|
|
raw: "data/raw/nq"
|
|
converted: "data/converted/nq-beir-minne.json"
|
|
include_unanswerable: false
|
|
slices:
|
|
- id: nq-beir-test-200
|
|
label: "NQ (BEIR) test (200)"
|
|
description: "200-case slice from BEIR test qrels"
|
|
limit: 200
|
|
corpus_limit: 5000
|
|
seed: 0x5eed2025
|