diff --git a/evals/registry/data/raven-matrices/symbolic-open/center_single.jsonl b/evals/registry/data/raven-matrices/symbolic-open/center_single.jsonl new file mode 100644 index 0000000000..c377aa0c09 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic-open/center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c84632d5b8e597ff50a4e9a959bfc6df2cab42798aa1b955978cd11666134a2b +size 1759999 diff --git a/evals/registry/data/raven-matrices/symbolic-open/distribute_four.jsonl b/evals/registry/data/raven-matrices/symbolic-open/distribute_four.jsonl new file mode 100644 index 0000000000..771248a7cf --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic-open/distribute_four.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e8109a535682ee2c04ea01b4bed7fe21de7d0b709f00fb81c8fc4af8b8b1f3 +size 2219071 diff --git a/evals/registry/data/raven-matrices/symbolic-open/distribute_nine.jsonl b/evals/registry/data/raven-matrices/symbolic-open/distribute_nine.jsonl new file mode 100644 index 0000000000..8f07b4c032 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic-open/distribute_nine.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:948b2b5ac3ad0e75696bd5ebd17ae5c42dbdc617cf5e6fc12601246a881171cc +size 2996485 diff --git a/evals/registry/data/raven-matrices/symbolic-open/in_center_single_out_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic-open/in_center_single_out_center_single.jsonl new file mode 100644 index 0000000000..8f218176ea --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic-open/in_center_single_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4860028a0c3a6e4fd886c81d31b95899d89ec426e37a6273e78e39bd126899ec +size 2695999 diff --git a/evals/registry/data/raven-matrices/symbolic-open/in_distribute_four_out_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic-open/in_distribute_four_out_center_single.jsonl new file mode 100644 index 0000000000..936c86bec7 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic-open/in_distribute_four_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31998c9f1371fe10d32edf1f714b3b521918111ca5fb38848887c6e370b57523 +size 3148789 diff --git a/evals/registry/data/raven-matrices/symbolic-open/left_center_single_right_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic-open/left_center_single_right_center_single.jsonl new file mode 100644 index 0000000000..f5bef6e1c8 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic-open/left_center_single_right_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe84cbc56576c987fdcb4b054b2e90ad4f712196ff42f1cd946d9e25986f7af +size 2083999 diff --git a/evals/registry/data/raven-matrices/symbolic-open/up_center_single_down_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic-open/up_center_single_down_center_single.jsonl new file mode 100644 index 0000000000..f681529b75 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic-open/up_center_single_down_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d69873a1ed555b00a4bf0b78ac0507381e778763d4ceec93c1b8a7abd12ab8 +size 2389999 diff --git a/evals/registry/data/raven-matrices/symbolic/center_single.jsonl b/evals/registry/data/raven-matrices/symbolic/center_single.jsonl new file mode 100644 index 0000000000..be1ead02f8 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic/center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0b7f3af9212d611e1fd86978218d2fc800407c9c86b2eb6a6271c4c7dd83abd +size 2741999 diff --git a/evals/registry/data/raven-matrices/symbolic/distribute_four.jsonl b/evals/registry/data/raven-matrices/symbolic/distribute_four.jsonl new file mode 100644 index 0000000000..dd33ab6334 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic/distribute_four.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f8126d64f49fe285a6143d7e04d5400e4aedd17091134cc837ff329f9fd0fa4 +size 3577943 diff --git a/evals/registry/data/raven-matrices/symbolic/distribute_nine.jsonl b/evals/registry/data/raven-matrices/symbolic/distribute_nine.jsonl new file mode 100644 index 0000000000..76626614c1 --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic/distribute_nine.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a6e5c6fedbd921c4b35c1bea277567de2b9fc43e3554892ae3a0501cfc5dbd8 +size 4949952 diff --git a/evals/registry/data/raven-matrices/symbolic/in_center_single_out_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic/in_center_single_out_center_single.jsonl new file mode 100644 index 0000000000..e3e55987de --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic/in_center_single_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27584db3e1ef723a046254478cd99fe91f19b19d4480a6b487a5167a9978631b +size 4405999 diff --git a/evals/registry/data/raven-matrices/symbolic/in_distribute_four_out_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic/in_distribute_four_out_center_single.jsonl new file mode 100644 index 0000000000..aa95d85eca --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic/in_distribute_four_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36161d7f018dfadbf5dc05c8674dbc69160284e37f7305cb0df93e402a768dfd +size 5234761 diff --git a/evals/registry/data/raven-matrices/symbolic/left_center_single_right_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic/left_center_single_right_center_single.jsonl new file mode 100644 index 0000000000..30b622ddba --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic/left_center_single_right_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aae3eb7ec4925d74899625f9cc09c284051cf57729b34d77c8407d09f6652c6 +size 3317999 diff --git a/evals/registry/data/raven-matrices/symbolic/up_center_single_down_center_single.jsonl b/evals/registry/data/raven-matrices/symbolic/up_center_single_down_center_single.jsonl new file mode 100644 index 0000000000..b7e140e66f --- /dev/null +++ b/evals/registry/data/raven-matrices/symbolic/up_center_single_down_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:359a45f8d54f3636c6aa8e321092d058c21e9e24a7796975f8e328209099bbc8 +size 3861999 diff --git a/evals/registry/data/raven-matrices/text-open/center_single.jsonl b/evals/registry/data/raven-matrices/text-open/center_single.jsonl new file mode 100644 index 0000000000..dd5cddfa9d --- /dev/null +++ b/evals/registry/data/raven-matrices/text-open/center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa6cf665aea6826ef334d291c340e94a14a39902b51e5b519e29fdccb0d1c92 +size 2520552 diff --git a/evals/registry/data/raven-matrices/text-open/distribute_four.jsonl b/evals/registry/data/raven-matrices/text-open/distribute_four.jsonl new file mode 100644 index 0000000000..3c302fbec0 --- /dev/null +++ b/evals/registry/data/raven-matrices/text-open/distribute_four.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd83b30b786296fd4b6af335ea70d349990bf05d46e44ccc6d6c05971a57d56a +size 4267743 diff --git a/evals/registry/data/raven-matrices/text-open/distribute_nine.jsonl b/evals/registry/data/raven-matrices/text-open/distribute_nine.jsonl new file mode 100644 index 0000000000..3c14b160aa --- /dev/null +++ b/evals/registry/data/raven-matrices/text-open/distribute_nine.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2756be45b7196385765358fe3f1c889c0ec1d4bafd5c8070b19d99dedfe6fc85 +size 7050510 diff --git a/evals/registry/data/raven-matrices/text-open/in_center_single_out_center_single.jsonl b/evals/registry/data/raven-matrices/text-open/in_center_single_out_center_single.jsonl new file mode 100644 index 0000000000..528a063808 --- /dev/null +++ b/evals/registry/data/raven-matrices/text-open/in_center_single_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67aa54a0ce12553e950b8e7945253ccfe2e3cc5f2f57519f0966dd470ba4c822 +size 4488970 diff --git a/evals/registry/data/raven-matrices/text-open/in_distribute_four_out_center_single.jsonl b/evals/registry/data/raven-matrices/text-open/in_distribute_four_out_center_single.jsonl new file mode 100644 index 0000000000..2b38a6f1e4 --- /dev/null +++ b/evals/registry/data/raven-matrices/text-open/in_distribute_four_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb00aa4309f26fa7c0c98d582d8d5bf2a5d3d0057ad8f3f61efe69bb22e79419 +size 6227228 diff --git a/evals/registry/data/raven-matrices/text-open/left_center_single_right_center_single.jsonl b/evals/registry/data/raven-matrices/text-open/left_center_single_right_center_single.jsonl new file mode 100644 index 0000000000..1d3423c1b9 --- /dev/null +++ b/evals/registry/data/raven-matrices/text-open/left_center_single_right_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247f33318e4eb23375ad42e5ce48073250b1334c633a449e6311ea97fb51db90 +size 5261071 diff --git a/evals/registry/data/raven-matrices/text-open/up_center_single_down_center_single.jsonl b/evals/registry/data/raven-matrices/text-open/up_center_single_down_center_single.jsonl new file mode 100644 index 0000000000..6d941adb0b --- /dev/null +++ b/evals/registry/data/raven-matrices/text-open/up_center_single_down_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d655b5666be54ddd7f79ec6d5a68aab1285d8c65ae27f6c8fe5da11c1af13834 +size 4793085 diff --git a/evals/registry/data/raven-matrices/text/center_single.jsonl b/evals/registry/data/raven-matrices/text/center_single.jsonl new file mode 100644 index 0000000000..9a4752b90e --- /dev/null +++ b/evals/registry/data/raven-matrices/text/center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9857cf346a58672fae6f3b3f4a9fe08f870c5316b3b0cc085f2758aebea29ac0 +size 4092815 diff --git a/evals/registry/data/raven-matrices/text/distribute_four.jsonl b/evals/registry/data/raven-matrices/text/distribute_four.jsonl new file mode 100644 index 0000000000..013e40c2ff --- /dev/null +++ b/evals/registry/data/raven-matrices/text/distribute_four.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764fbfa274e00fcdc116f5bb492ae3cbb57885210200b1c00dd4065ef5ce0424 +size 7268404 diff --git a/evals/registry/data/raven-matrices/text/distribute_nine.jsonl b/evals/registry/data/raven-matrices/text/distribute_nine.jsonl new file mode 100644 index 0000000000..be70d64640 --- /dev/null +++ b/evals/registry/data/raven-matrices/text/distribute_nine.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7899145b23ad2f146ea37c8f6f536790f79812470676ce52b7c9231da27f872 +size 12176681 diff --git a/evals/registry/data/raven-matrices/text/in_center_single_out_center_single.jsonl b/evals/registry/data/raven-matrices/text/in_center_single_out_center_single.jsonl new file mode 100644 index 0000000000..56acfbb588 --- /dev/null +++ b/evals/registry/data/raven-matrices/text/in_center_single_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e04ce455c69f07c797c865878b7934db9bdabe76ec5ef5a8623453dce4ff1b59 +size 7591904 diff --git a/evals/registry/data/raven-matrices/text/in_distribute_four_out_center_single.jsonl b/evals/registry/data/raven-matrices/text/in_distribute_four_out_center_single.jsonl new file mode 100644 index 0000000000..769471878e --- /dev/null +++ b/evals/registry/data/raven-matrices/text/in_distribute_four_out_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b8f96d41af58476d2ebb319e4ddc4cd16b924f1b5557fe9844cd8f00392233 +size 10763994 diff --git a/evals/registry/data/raven-matrices/text/left_center_single_right_center_single.jsonl b/evals/registry/data/raven-matrices/text/left_center_single_right_center_single.jsonl new file mode 100644 index 0000000000..087c3f4b65 --- /dev/null +++ b/evals/registry/data/raven-matrices/text/left_center_single_right_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c46735a144951057bca8e1bee6da0902adf0f474afb761319c0c940aabfd1d6 +size 8962698 diff --git a/evals/registry/data/raven-matrices/text/up_center_single_down_center_single.jsonl b/evals/registry/data/raven-matrices/text/up_center_single_down_center_single.jsonl new file mode 100644 index 0000000000..c8e07662b5 --- /dev/null +++ b/evals/registry/data/raven-matrices/text/up_center_single_down_center_single.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a1c728c624279ce250dab44db0fe0815a9f88c557f66fc92629746eae3b346 +size 8130573 diff --git a/evals/registry/eval_sets/raven-matrices.yaml b/evals/registry/eval_sets/raven-matrices.yaml new file mode 100644 index 0000000000..841024e784 --- /dev/null +++ b/evals/registry/eval_sets/raven-matrices.yaml @@ -0,0 +1,31 @@ +raven-matrices: + evals: + - raven-matrices-symbolic-center-single + - raven-matrices-symbolic-distribute-four + - raven-matrices-symbolic-distribute-nine + - raven-matrices-symbolic-in-center-single-out-center-single + - raven-matrices-symbolic-in-distribute-four-out-center-single + - raven-matrices-symbolic-left-center-single-right-center-single + - raven-matrices-symbolic-up-center-single-down-center-single + - raven-matrices-symbolic-open-center-single + - raven-matrices-symbolic-open-distribute-four + - raven-matrices-symbolic-open-distribute-nine + - raven-matrices-symbolic-open-in-center-single-out-center-single + - raven-matrices-symbolic-open-in-distribute-four-out-center-single + - raven-matrices-symbolic-open-left-center-single-right-center-single + - raven-matrices-symbolic-open-up-center-single-down-center-single + - raven-matrices-text-center-single + - raven-matrices-text-distribute-four + - raven-matrices-text-distribute-nine + - raven-matrices-text-in-center-single-out-center-single + - raven-matrices-text-in-distribute-four-out-center-single + - raven-matrices-text-left-center-single-right-center-single + - raven-matrices-text-up-center-single-down-center-single + - raven-matrices-text-open-center-single + - raven-matrices-text-open-distribute-four + - raven-matrices-text-open-distribute-nine + - raven-matrices-text-open-in-center-single-out-center-single + - raven-matrices-text-open-in-distribute-four-out-center-single + - raven-matrices-text-open-left-center-single-right-center-single + - raven-matrices-text-open-up-center-single-down-center-single + diff --git a/evals/registry/evals/raven-matrices.yaml b/evals/registry/evals/raven-matrices.yaml new file mode 100644 index 0000000000..9f06778485 --- /dev/null +++ b/evals/registry/evals/raven-matrices.yaml @@ -0,0 +1,313 @@ +raven-matrices-symbolic-center-single: + id: raven-matrices-symbolic-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test. Matrices composed of a single centered object. + metrics: [accuracy] + +raven-matrices-symbolic-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic/center_single.jsonl + + +raven-matrices-symbolic-distribute-four: + id: raven-matrices-symbolic-distribute-four.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test. Matrices composed of four object. + metrics: [accuracy] + +raven-matrices-symbolic-distribute-four.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic/distribute_four.jsonl + + +raven-matrices-symbolic-distribute-nine: + id: raven-matrices-symbolic-distribute-nine.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test. Matrices composed of nine object. + metrics: [accuracy] + +raven-matrices-symbolic-distribute-nine.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic/distribute_nine.jsonl + + +raven-matrices-symbolic-in-center-single-out-center-single: + id: raven-matrices-symbolic-in-center-single-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test. Matrices composed of a small object inside a big object. + metrics: [accuracy] + +raven-matrices-symbolic-in-center-single-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic/in_center_single_out_center_single.jsonl + + +raven-matrices-symbolic-in-distribute-four-out-center-single: + id: raven-matrices-symbolic-in-distribute-four-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test. Matrices composed of four small objects inside a big object. + metrics: [accuracy] + +raven-matrices-symbolic-in-distribute-four-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic/in_distribute_four_out_center_single.jsonl + + +raven-matrices-symbolic-left-center-single-right-center-single: + id: raven-matrices-symbolic-left-center-single-right-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test. Matrices composed of two objects aligned horizontally. + metrics: [accuracy] + +raven-matrices-symbolic-left-center-single-right-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic/left_center_single_right_center_single.jsonl + + +raven-matrices-symbolic-up-center-single-down-center-single: + id: raven-matrices-symbolic-up-center-single-down-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test. Matrices composed of two objects aligned vertically. + metrics: [accuracy] + +raven-matrices-symbolic-up-center-single-down-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic/up_center_single_down_center_single.jsonl + + + + +raven-matrices-symbolic-open-center-single: + id: raven-matrices-symbolic-open-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test with no multiple choices provided. Matrices composed of a single centered object. + metrics: [accuracy] + +raven-matrices-symbolic-open-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic-open/center_single.jsonl + + +raven-matrices-symbolic-open-distribute-four: + id: raven-matrices-symbolic-open-distribute-four.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test with no multiple choices provided. Matrices composed of four object. + metrics: [accuracy] + +raven-matrices-symbolic-open-distribute-four.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic-open/distribute_four.jsonl + + +raven-matrices-symbolic-open-distribute-nine: + id: raven-matrices-symbolic-open-distribute-nine.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test with no multiple choices provided. Matrices composed of nine object. + metrics: [accuracy] + +raven-matrices-symbolic-open-distribute-nine.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic-open/distribute_nine.jsonl + + +raven-matrices-symbolic-open-in-center-single-out-center-single: + id: raven-matrices-symbolic-open-in-center-single-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test with no multiple choices provided. Matrices composed of a small object inside a big object. + metrics: [accuracy] + +raven-matrices-symbolic-open-in-center-single-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic-open/in_center_single_out_center_single.jsonl + + +raven-matrices-symbolic-open-in-distribute-four-out-center-single: + id: raven-matrices-symbolic-open-in-distribute-four-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test with no multiple choices provided. Matrices composed of four small objects inside a big object. + metrics: [accuracy] + +raven-matrices-symbolic-open-in-distribute-four-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic-open/in_distribute_four_out_center_single.jsonl + + +raven-matrices-symbolic-open-left-center-single-right-center-single: + id: raven-matrices-symbolic-open-left-center-single-right-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test with no multiple choices provided. Matrices composed of two objects aligned horizontally. + metrics: [accuracy] + +raven-matrices-symbolic-open-left-center-single-right-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic-open/left_center_single_right_center_single.jsonl + + +raven-matrices-symbolic-open-up-center-single-down-center-single: + id: raven-matrices-symbolic-open-up-center-single-down-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a symbolic version of the Raven test with no multiple choices provided. Matrices composed of two objects aligned vertically. + metrics: [accuracy] + +raven-matrices-symbolic-open-up-center-single-down-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/symbolic-open/up_center_single_down_center_single.jsonl + + + +raven-matrices-text-center-single: + id: raven-matrices-text-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test. Matrices composed of a single centered object. + metrics: [accuracy] + +raven-matrices-text-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text/center_single.jsonl + + +raven-matrices-text-distribute-four: + id: raven-matrices-text-distribute-four.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test. Matrices composed of four object. + metrics: [accuracy] + +raven-matrices-text-distribute-four.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text/distribute_four.jsonl + + +raven-matrices-text-distribute-nine: + id: raven-matrices-text-distribute-nine.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test. Matrices composed of nine object. + metrics: [accuracy] + +raven-matrices-text-distribute-nine.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text/distribute_nine.jsonl + + +raven-matrices-text-in-center-single-out-center-single: + id: raven-matrices-text-in-center-single-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test. Matrices composed of a small object inside a big object. + metrics: [accuracy] + +raven-matrices-text-in-center-single-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text/in_center_single_out_center_single.jsonl + + +raven-matrices-text-in-distribute-four-out-center-single: + id: raven-matrices-text-in-distribute-four-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test. Matrices composed of four small objects inside a big object. + metrics: [accuracy] + +raven-matrices-text-in-distribute-four-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text/in_distribute_four_out_center_single.jsonl + + +raven-matrices-text-left-center-single-right-center-single: + id: raven-matrices-text-left-center-single-right-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test. Matrices composed of two objects aligned horizontally. + metrics: [accuracy] + +raven-matrices-text-left-center-single-right-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text/left_center_single_right_center_single.jsonl + + +raven-matrices-text-up-center-single-down-center-single: + id: raven-matrices-text-up-center-single-down-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test. Matrices composed of two objects aligned vertically. + metrics: [accuracy] + +raven-matrices-text-up-center-single-down-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text/up_center_single_down_center_single.jsonl + + + + +raven-matrices-text-open-center-single: + id: raven-matrices-text-open-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test with no multiple choices provided. Matrices composed of a single centered object. + metrics: [accuracy] + +raven-matrices-text-open-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text-open/center_single.jsonl + + +raven-matrices-text-open-distribute-four: + id: raven-matrices-text-open-distribute-four.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test with no multiple choices provided. Matrices composed of four object. + metrics: [accuracy] + +raven-matrices-text-open-distribute-four.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text-open/distribute_four.jsonl + + +raven-matrices-text-open-distribute-nine: + id: raven-matrices-text-open-distribute-nine.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test with no multiple choices provided. Matrices composed of nine object. + metrics: [accuracy] + +raven-matrices-text-open-distribute-nine.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text-open/distribute_nine.jsonl + + +raven-matrices-text-open-in-center-single-out-center-single: + id: raven-matrices-text-open-in-center-single-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test with no multiple choices provided. Matrices composed of a small object inside a big object. + metrics: [accuracy] + +raven-matrices-text-open-in-center-single-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text-open/in_center_single_out_center_single.jsonl + + +raven-matrices-text-open-in-distribute-four-out-center-single: + id: raven-matrices-text-open-in-distribute-four-out-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test with no multiple choices provided. Matrices composed of four small objects inside a big object. + metrics: [accuracy] + +raven-matrices-text-open-in-distribute-four-out-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text-open/in_distribute_four_out_center_single.jsonl + + +raven-matrices-text-open-left-center-single-right-center-single: + id: raven-matrices-text-open-left-center-single-right-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test with no multiple choices provided. Matrices composed of two objects aligned horizontally. + metrics: [accuracy] + +raven-matrices-text-open-left-center-single-right-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text-open/left_center_single_right_center_single.jsonl + + +raven-matrices-text-open-up-center-single-down-center-single: + id: raven-matrices-text-open-up-center-single-down-center-single.dev.v0 + description: Evaluate the abstract reasoning abilities of the model on a text version of the Raven test with no multiple choices provided. Matrices composed of two objects aligned vertically. + metrics: [accuracy] + +raven-matrices-text-open-up-center-single-down-center-single.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: raven-matrices/text-open/up_center_single_down_center_single.jsonl + +