Skip to content

Commit d386498

Browse files
Adding Basic Skills to in-loop eval (#7)
* basic skills task * basic skills task * basic skills coding * basic_skills_arithmetic * basic_skills_string_operations * basic_skills_coding * basic_skills_pattern * basic_skills_common_knowledge * basic_skills_logical_reasoning * adding tasks to tasks.py * requests.jsonl.gz * requests.jsonl.gz * lint * prep for release v0.7.2 --------- Co-authored-by: David Heineman <[email protected]>
1 parent 8dd0b46 commit d386498

File tree

15 files changed

+183
-1
lines changed

15 files changed

+183
-1
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
## [v0.7.2](https://github.com/allenai/OLMo-in-loop-evals/releases/tag/v0.7.2) - 2025-05-16
11+
12+
- Add basic skills evals
13+
1014
## [v0.7.1](https://github.com/allenai/OLMo-in-loop-evals/releases/tag/v0.7.1) - 2025-04-02
1115

1216
- Fix normalization to match the OLMES standard
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"task_name": "basic_skills_arithmetic",
3+
"task_hash": "56711b967c78d896ef51ba00aef5cfb0",
4+
"task_config": {
5+
"dataset_path": "basic_skills_arithmetic",
6+
"primary_metric": "acc_per_token",
7+
"split": "validation",
8+
"num_shots": 5,
9+
"metadata": {
10+
"regimes": [
11+
"OLMES-v0.1"
12+
],
13+
"alias": "basic_skills_arithmetic:rc::olmes"
14+
},
15+
"generation_kwargs": {},
16+
"context_kwargs": {},
17+
"dataset_name": "arithmetic",
18+
"task_name": "basic_skills_arithmetic",
19+
"version": 0,
20+
"task_core": "basic_skills_arithmetic"
21+
},
22+
"current_date": "2025-05-12 00:06:28 UTC"
23+
}
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"task_name": "basic_skills_coding",
3+
"task_hash": "d748d1d8ba506d3d234eed529ef62c3e",
4+
"task_config": {
5+
"dataset_path": "basic_skills_coding",
6+
"primary_metric": "acc_per_token",
7+
"split": "validation",
8+
"num_shots": 5,
9+
"metadata": {
10+
"regimes": [
11+
"OLMES-v0.1"
12+
],
13+
"alias": "basic_skills_coding:rc::olmes"
14+
},
15+
"generation_kwargs": {},
16+
"context_kwargs": {},
17+
"dataset_name": "coding",
18+
"task_name": "basic_skills_coding",
19+
"version": 0,
20+
"task_core": "basic_skills_coding"
21+
},
22+
"current_date": "2025-05-12 00:06:28 UTC"
23+
}
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"task_name": "basic_skills_common_knowledge",
3+
"task_hash": "51e88e759602f9085a8c779da375d833",
4+
"task_config": {
5+
"dataset_path": "basic_skills_common_knowledge",
6+
"primary_metric": "acc_per_token",
7+
"split": "validation",
8+
"num_shots": 5,
9+
"metadata": {
10+
"regimes": [
11+
"OLMES-v0.1"
12+
],
13+
"alias": "basic_skills_common_knowledge:rc::olmes"
14+
},
15+
"generation_kwargs": {},
16+
"context_kwargs": {},
17+
"dataset_name": "common_knowledge",
18+
"task_name": "basic_skills_common_knowledge",
19+
"version": 0,
20+
"task_core": "basic_skills_common_knowledge"
21+
},
22+
"current_date": "2025-05-12 00:06:28 UTC"
23+
}
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"task_name": "basic_skills_logical_reasoning",
3+
"task_hash": "a3d406a2f4224604b7e6bbf68050691d",
4+
"task_config": {
5+
"dataset_path": "basic_skills_logical_reasoning",
6+
"primary_metric": "acc_per_token",
7+
"split": "validation",
8+
"num_shots": 5,
9+
"metadata": {
10+
"regimes": [
11+
"OLMES-v0.1"
12+
],
13+
"alias": "basic_skills_logical_reasoning:rc::olmes"
14+
},
15+
"generation_kwargs": {},
16+
"context_kwargs": {},
17+
"dataset_name": "logical_reasoning",
18+
"task_name": "basic_skills_logical_reasoning",
19+
"version": 0,
20+
"task_core": "basic_skills_logical_reasoning"
21+
},
22+
"current_date": "2025-05-12 00:06:28 UTC"
23+
}
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"task_name": "basic_skills_pattern",
3+
"task_hash": "67983750bfb70a3b5cc34dcd67ee3c6a",
4+
"task_config": {
5+
"dataset_path": "basic_skills_pattern",
6+
"primary_metric": "acc_per_token",
7+
"split": "validation",
8+
"num_shots": 5,
9+
"metadata": {
10+
"regimes": [
11+
"OLMES-v0.1"
12+
],
13+
"alias": "basic_skills_pattern:rc::olmes"
14+
},
15+
"generation_kwargs": {},
16+
"context_kwargs": {},
17+
"dataset_name": "pattern",
18+
"task_name": "basic_skills_pattern",
19+
"version": 0,
20+
"task_core": "basic_skills_pattern"
21+
},
22+
"current_date": "2025-05-12 00:06:28 UTC"
23+
}

0 commit comments

Comments
 (0)