diff --git a/evals/registry/data/consensus_summary/samples.jsonl b/evals/registry/data/consensus_summary/samples.jsonl new file mode 100644 index 0000000000..7605841b3e --- /dev/null +++ b/evals/registry/data/consensus_summary/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3573fca21526acb8e65cba1e6e88ae6877856454ab224dda4bd04828527c1299 +size 495294 diff --git a/evals/registry/evals/consensus_summary.yaml b/evals/registry/evals/consensus_summary.yaml new file mode 100644 index 0000000000..26745469a0 --- /dev/null +++ b/evals/registry/evals/consensus_summary.yaml @@ -0,0 +1,11 @@ +consensus_summary: + id: consensus_summary.dev.v0 + description: Utilize the model's ability to produce a Consensus in response to a scientific inquiry. + metrics: [accuracy] + +consensus_summary.dev.v0: + class: evals.elsuite.modelgraded.classify:ModelBasedClassify + args: + samples_jsonl: consensus_summary/samples.jsonl + eval_type: cot_classify + modelgraded_spec: fact