Skip to content

Commit 2f01ec8

Browse files
Add in-loop gen tasks (#5)
* add new gen tasks
1 parent 5bc2920 commit 2f01ec8

File tree

22 files changed

+254
-146
lines changed

22 files changed

+254
-146
lines changed

.github/actions/setup-venv/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ runs:
3030
# Get the exact Python version to use in the cache key.
3131
echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV
3232
33-
- uses: actions/cache@v2
33+
- uses: actions/cache@v4
3434
id: virtualenv-cache
3535
with:
3636
path: .venv
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"task_name": "codex_humaneval", "task_hash": "f9ca57507a10bd02b963e82a02ce2c5e", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "codex_humaneval", "task_core": "codex_humaneval", "limit": null, "split": "test", "num_shots": 0, "fewshot_seed": 1234, "primary_metric": "bits_per_byte", "random_subsample_seed": 1234, "context_kwargs": {"answer_prefix": "Here is the completed function:\n\n```python\n"}, "generation_kwargs": {"max_gen_toks": 512, "do_sample": false, "temperature": 0.0, "stop_sequences": ["\nclass", "\nif", "\nprint", "\n#", "\n```", "\n```\n\n", "<|eot_id|>"], "repeats": 1}, "metric_kwargs": {"pass_at_ks": [1]}, "native_id_field": "task_id", "fewshot_source": null, "dataset_path": "openai_humaneval", "dataset_name": null, "use_chat_format": null, "version": 0.1, "revision": null, "compute_gold_bpb": true, "metadata": {"alias": "codex_humaneval::bpb"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.1126320362091064, "current_date": "2025-03-08 02:22:57 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01J8QFWGN4DC8VC8NEF9XT1CHF", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JNRVCYXMHRVVX4W9ED2EPQMT", "BEAKER_WORKLOAD_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ENVIRONMENT_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ASSIGNED_CPU_COUNT": "124", "BEAKER_ASSIGNED_GPU_COUNT": "4", "BEAKER_NODE_HOSTNAME": "neptune-cs-aus-262.reviz.ai2.in"}}
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"task_name": "mbpp", "task_hash": "5e95b21e44ad49ff8acbcc9f7a96c083", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "mbpp", "task_core": "mbpp", "limit": null, "split": "test", "num_shots": 0, "fewshot_seed": 1234, "primary_metric": "bits_per_byte", "random_subsample_seed": 1234, "context_kwargs": {"assistant_prefix": "\nBelow is a Python script with a self-contained function that solves the problem and passes corresponding tests:\n```python\n", "prompt_variant": "evalplus"}, "generation_kwargs": {"max_gen_toks": 512, "do_sample": false, "temperature": 0.0, "stop_sequences": ["\nclass", "\nassert", "\n\"\"\"", "\nprint", "\nif", "\n```", "\n#", "\n<|/", "<|eot_id|>"], "repeats": 1}, "metric_kwargs": {"pass_at_ks": [1]}, "native_id_field": "task_id", "fewshot_source": "Original:MBPP", "dataset_path": "google-research-datasets/mbpp", "dataset_name": null, "use_chat_format": false, "version": 0.1, "revision": null, "compute_gold_bpb": true, "metadata": {"alias": "mbpp::bpb"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 0.7109775543212891, "current_date": "2025-03-08 02:22:56 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01J8QFWGN4DC8VC8NEF9XT1CHF", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JNRVCYXMHRVVX4W9ED2EPQMT", "BEAKER_WORKLOAD_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ENVIRONMENT_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ASSIGNED_CPU_COUNT": "124", "BEAKER_ASSIGNED_GPU_COUNT": "4", "BEAKER_NODE_HOSTNAME": "neptune-cs-aus-262.reviz.ai2.in"}}
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"task_name": "gsm8k", "task_hash": "9ba55955eb449c76169c4ae523e36bd0", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "gsm8k", "task_core": "gsm8k", "limit": null, "split": "test", "num_shots": 8, "fewshot_seed": 1234, "primary_metric": "bits_per_byte", "random_subsample_seed": 1234, "context_kwargs": {"no_cot": false}, "generation_kwargs": {"max_gen_toks": 512, "do_sample": false, "temperature": 0.0, "stop_sequences": ["Question:", "</s>", "<|im_end|>", "\n\n"], "repeats": 1}, "metric_kwargs": {"regexes_to_ignore": [",", "\\$", "(?s).*#### ", "\\.$"]}, "native_id_field": "id", "fewshot_source": "STD:GSM8k", "dataset_path": "gsm8k", "dataset_name": "main", "use_chat_format": null, "version": 0.1, "revision": null, "compute_gold_bpb": true, "metadata": {"alias": "gsm8k::bpb"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 2.7949564456939697, "current_date": "2025-03-08 02:22:55 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01J8QFWGN4DC8VC8NEF9XT1CHF", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JNRVCYXMHRVVX4W9ED2EPQMT", "BEAKER_WORKLOAD_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ENVIRONMENT_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ASSIGNED_CPU_COUNT": "124", "BEAKER_ASSIGNED_GPU_COUNT": "4", "BEAKER_NODE_HOSTNAME": "neptune-cs-aus-262.reviz.ai2.in"}}
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"task_name": "minerva_math_algebra", "task_hash": "21fe13421d3babf42aca3b20231444bb", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "minerva_math_algebra", "task_core": "minerva_math_algebra", "limit": null, "split": "test", "num_shots": 4, "fewshot_seed": 1234, "primary_metric": "exact_match", "random_subsample_seed": 1234, "context_kwargs": {"use_cot": true, "cot_style": "minerva"}, "generation_kwargs": {"max_gen_toks": 1024, "temperature": 0.0, "do_sample": false, "stop_sequences": ["Problem:", "\n\n"]}, "metric_kwargs": {}, "native_id_field": "index", "fewshot_source": "Minerva:MATH:fixed", "dataset_path": "EleutherAI/hendrycks_math", "dataset_name": "algebra", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "metadata": {"alias": "minerva_math_algebra::bpb"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 1.063769817352295, "current_date": "2025-03-08 02:22:58 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01J8QFWGN4DC8VC8NEF9XT1CHF", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JNRVCYXMHRVVX4W9ED2EPQMT", "BEAKER_WORKLOAD_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ENVIRONMENT_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ASSIGNED_CPU_COUNT": "124", "BEAKER_ASSIGNED_GPU_COUNT": "4", "BEAKER_NODE_HOSTNAME": "neptune-cs-aus-262.reviz.ai2.in"}}
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"task_name": "minerva_math_counting_and_probability", "task_hash": "15f08a35dec3ab28a4afc59f65a26541", "model_hash": "99914b932bd37a50b983c5e7c90ae93b", "model_config": {"model": null, "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "minerva_math_counting_and_probability", "task_core": "minerva_math_counting_and_probability", "limit": null, "split": "test", "num_shots": 4, "fewshot_seed": 1234, "primary_metric": "exact_match", "random_subsample_seed": 1234, "context_kwargs": {"use_cot": true, "cot_style": "minerva"}, "generation_kwargs": {"max_gen_toks": 1024, "temperature": 0.0, "do_sample": false, "stop_sequences": ["Problem:", "\n\n"]}, "metric_kwargs": {}, "native_id_field": "index", "fewshot_source": "Minerva:MATH:fixed", "dataset_path": "EleutherAI/hendrycks_math", "dataset_name": "counting_and_probability", "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": true, "metadata": {"alias": "minerva_math_counting_and_probability::bpb"}}, "compute_config": {"batch_size": "4", "max_batch_size": 32, "output_dir": "workspace", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "autofetch_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 0.7944858074188232, "current_date": "2025-03-08 02:22:59 UTC", "num_instances": 0, "beaker_info": {"BEAKER_NODE_ID": "01J8QFWGN4DC8VC8NEF9XT1CHF", "BEAKER_JOB_KIND": "session", "BEAKER_JOB_ID": "01JNRVCYXMHRVVX4W9ED2EPQMT", "BEAKER_WORKLOAD_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ENVIRONMENT_ID": "01JNRVCYXMK772PE71J0EWXTS5", "BEAKER_ASSIGNED_CPU_COUNT": "124", "BEAKER_ASSIGNED_GPU_COUNT": "4", "BEAKER_NODE_HOSTNAME": "neptune-cs-aus-262.reviz.ai2.in"}}

0 commit comments

Comments
 (0)