diff --git a/evals/registry/data/2d_movement/samples.jsonl b/evals/registry/data/2d_movement/samples.jsonl new file mode 100644 index 0000000000..56f76a1b41 --- /dev/null +++ b/evals/registry/data/2d_movement/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a599e52030f1dd1bf110785c54ed27a297546412974b8dbe1e88a50250d007e7 +size 60724 diff --git a/evals/registry/evals/2d_movement.yaml b/evals/registry/evals/2d_movement.yaml new file mode 100644 index 0000000000..fa978e8306 --- /dev/null +++ b/evals/registry/evals/2d_movement.yaml @@ -0,0 +1,9 @@ +2d_movement: + id: 2d_movement.dev.v0 + description: Test the model's ability to keep track of position and orientation in a 2D environment. + metrics: [accuracy] + +2d_movement.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: 2d_movement/samples.jsonl