Skip to content

Commit d3d2448

Browse files
committed
add few show he
1 parent 696f7ae commit d3d2448

File tree

3 files changed

+5
-0
lines changed

3 files changed

+5
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"task_name": "codex_humaneval", "task_hash": "b271b0f127ae71cf79a80d6463f0c877", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "codex_humaneval", "task_core": "codex_humaneval", "limit": null, "split": "test", "num_shots": 3, "fewshot_seed": 1234, "primary_metric": "bits_per_byte_corr", "random_subsample_seed": 1234, "context_kwargs": {"answer_prefix": ""}, "generation_kwargs": {"max_gen_toks": 512, "do_sample": false, "temperature": 0.0, "stop_sequences": [], "repeats": 1}, "metric_kwargs": {"pass_at_ks": [1]}, "native_id_field": "task_id", "fewshot_source": null, "dataset_path": "openai_humaneval", "dataset_name": null, "use_chat_format": false, "version": 0.1, "revision": null, "compute_gold_bpb": true, "external_eval": null, "custom_kwargs": null, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "codex_humaneval:3shot:bpb::none"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.107417345046997, "current_date": "2025-05-19 20:42:07 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01JR1D5PVN5HA1E2SX4FNFAZN4", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JVGSX2MHV1Z5AY6TBD7F6HAS", "BEAKER_WORKLOAD_ID": "01JVGSX2MHNQK7G4K6C93E8WZR", "BEAKER_ENVIRONMENT_ID": "01JVGSX2MHNQK7G4K6C93E8WZR", "BEAKER_ASSIGNED_CPU_COUNT": "127.5", "BEAKER_ASSIGNED_GPU_COUNT": "4", "BEAKER_NODE_HOSTNAME": "neptune-cs-aus-264.reviz.ai2.in"}}
Binary file not shown.

src/olmo_eval/tasks.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2620,6 +2620,10 @@ def doc_to_label(self, doc) -> int:
26202620
OEEvalTask,
26212621
{"dataset_path": "codex_humaneval", "dataset_name": "gold_bpb_0shot", "metric_type": "bpb"},
26222622
),
2623+
"codex_humaneval_gold_bpb_3shot": (
2624+
OEEvalTask,
2625+
{"dataset_path": "codex_humaneval", "dataset_name": "gold_bpb_3shot", "metric_type": "bpb"},
2626+
),
26232627
"codex_mbpp_gold_bpb_0shot": (
26242628
OEEvalTask,
26252629
{"dataset_path": "codex_mbpp", "dataset_name": "gold_bpb_0shot", "metric_type": "bpb"},

0 commit comments

Comments
 (0)