Skip to content

Commit 25a617c

Browse files
committed
Merge branch 'main' into dataset_infos-in-yaml
2 parents a53cc05 + a897564 commit 25a617c

File tree

27 files changed

+792
-212
lines changed

27 files changed

+792
-212
lines changed

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
<a href="https://zenodo.org/badge/latestdoi/250213286"><img src="https://zenodo.org/badge/250213286.svg" alt="DOI"></a>
2626
</p>
2727

28-
** ⚠️ The "master" branch has been renamed "main", please update your forks with [these instructions](https://github.com/huggingface/datasets/issues/4629)**
29-
3028
🤗 Datasets is a lightweight library providing **two** main features:
3129

3230
- **one-line dataloaders for many public datasets**: one-liners to download and pre-process any of the ![number of datasets](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/datasets&color=brightgreen) major public datasets (text datasets in 467 languages and dialects, image datasets, audio datasets, etc.) provided on the [HuggingFace Datasets Hub](https://huggingface.co/datasets). With a simple command like `squad_dataset = load_dataset("squad")`, get any of these datasets ready to use in a dataloader for training/evaluating a ML model (Numpy/Pandas/PyTorch/TensorFlow/JAX),

datasets/adv_glue/dataset_infos.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

datasets/compguesswhat/README.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,24 @@
11
---
2-
paperswithcode_id: compguesswhat
2+
annotations_creators:
3+
- machine-generated
4+
language:
5+
- en
6+
language_creators:
7+
- found
8+
license:
9+
- unknown
10+
multilinguality:
11+
- monolingual
312
pretty_name: CompGuessWhat?!
13+
size_categories:
14+
- 100K<n<1M
15+
source_datasets:
16+
- extended|other-guesswhat
17+
task_categories:
18+
- visual-question-answering
19+
task_ids:
20+
- visual-question-answering
21+
paperswithcode_id: compguesswhat
422
---
523

624
# Dataset Card for "compguesswhat"

datasets/compguesswhat/compguesswhat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class Compguesswhat(datasets.GeneratorBasedBuilder):
5050
name="compguesswhat-original",
5151
gameplay_scenario="original",
5252
description="CompGuessWhat?! subset of games from the original GuessWhat?! dataset",
53-
data_url="https://www.dropbox.com/s/l0nc13udml6vs0w/compguesswhat-original.zip?dl=1",
53+
data_url="https://www.dropbox.com/s/qd9wlydpkpmq8rr/compguesswhat-original.zip?dl=1",
5454
splits={
5555
"train": "compguesswhat.train.jsonl.gz",
5656
"valid": "compguesswhat.valid.jsonl.gz",
@@ -61,7 +61,7 @@ class Compguesswhat(datasets.GeneratorBasedBuilder):
6161
name="compguesswhat-zero_shot",
6262
gameplay_scenario="zero_shot",
6363
description="CompGuessWhat?! reference set of games for zero-shot evaluation using NOCAPS images",
64-
data_url="https://www.dropbox.com/s/gd46azul7o7iip4/compguesswhat-zero_shot.zip?dl=1",
64+
data_url="https://www.dropbox.com/s/f5o2t7aiok7kpcm/compguesswhat-zero_shot.zip?dl=1",
6565
splits={
6666
"nd_valid": "compguesswhat.nd_valid.jsonl.gz",
6767
"nd_test": "compguesswhat.nd_test.jsonl.gz",
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"compguesswhat-original": {"description": "\n CompGuessWhat?! is an instance of a multi-task framework for evaluating the quality of learned neural representations,\n in particular concerning attribute grounding. Use this dataset if you want to use the set of games whose reference\n scene is an image in VisualGenome. Visit the website for more details: https://compguesswhat.github.io\n ", "citation": " @inproceedings{suglia2020compguesswhat,\n title={CompGuessWhat?!: a Multi-task Evaluation Framework for Grounded Language Learning},\n author={Suglia, Alessandro, Konstas, Ioannis, Vanzo, Andrea, Bastianelli, Emanuele, Desmond Elliott, Stella Frank and Oliver Lemon},\n booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},\n year={2020}\n }\n ", "homepage": "https://compguesswhat.github.io/", "license": "", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "target_id": {"dtype": "int32", "id": null, "_type": "Value"}, "timestamp": {"dtype": "string", "id": null, "_type": "Value"}, "status": {"dtype": "string", "id": null, "_type": "Value"}, "image": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "file_name": {"dtype": "string", "id": null, "_type": "Value"}, "flickr_url": {"dtype": "string", "id": null, "_type": "Value"}, "coco_url": {"dtype": "string", "id": null, "_type": "Value"}, "height": {"dtype": "int32", "id": null, "_type": "Value"}, "width": {"dtype": "int32", "id": null, "_type": "Value"}, "visual_genome": {"width": {"dtype": "int32", "id": null, "_type": "Value"}, "height": {"dtype": "int32", "id": null, "_type": "Value"}, "url": {"dtype": "string", "id": null, "_type": "Value"}, "coco_id": {"dtype": "int32", "id": null, "_type": "Value"}, "flickr_id": {"dtype": "string", "id": null, "_type": "Value"}, "image_id": {"dtype": "string", "id": null, "_type": "Value"}}}, "qas": {"feature": {"question": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "int32", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "objects": {"feature": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "bbox": {"feature": {"dtype": "float32", "id": null, "_type": "Value"}, "length": 4, "id": null, "_type": "Sequence"}, "category": {"dtype": "string", "id": null, "_type": "Value"}, "area": {"dtype": "float32", "id": null, "_type": "Value"}, "category_id": {"dtype": "int32", "id": null, "_type": "Value"}, "segment": {"feature": {"feature": {"dtype": "float32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "length": -1, "id": null, "_type": "Sequence"}}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "compguesswhat", "config_name": "compguesswhat-original", "version": {"version_str": "0.2.0", "description": "Second CompGuessWhat?! release", "major": 0, "minor": 2, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 123557020, "num_examples": 46341, "dataset_name": "compguesswhat"}, "validation": {"name": "validation", "num_bytes": 25441516, "num_examples": 9738, "dataset_name": "compguesswhat"}, "test": {"name": "test", "num_bytes": 25369315, "num_examples": 9621, "dataset_name": "compguesswhat"}}, "download_checksums": {"https://www.dropbox.com/s/l0nc13udml6vs0w/compguesswhat-original.zip?dl=1": {"num_bytes": 107201655, "checksum": "f965b469142cf65635b2b8b309ba2b28539fd7ad40039b11856cd662f2b000c1"}}, "download_size": 107201655, "post_processing_size": null, "dataset_size": 174367851, "size_in_bytes": 281569506}, "compguesswhat-zero_shot": {"description": "\n CompGuessWhat?! is an instance of a multi-task framework for evaluating the quality of learned neural representations,\n in particular concerning attribute grounding. Use this dataset if you want to use the set of games whose reference\n scene is an image in VisualGenome. Visit the website for more details: https://compguesswhat.github.io\n ", "citation": " @inproceedings{suglia2020compguesswhat,\n title={CompGuessWhat?!: a Multi-task Evaluation Framework for Grounded Language Learning},\n author={Suglia, Alessandro, Konstas, Ioannis, Vanzo, Andrea, Bastianelli, Emanuele, Desmond Elliott, Stella Frank and Oliver Lemon},\n booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},\n year={2020}\n }\n ", "homepage": "https://compguesswhat.github.io/", "license": "", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "target_id": {"dtype": "string", "id": null, "_type": "Value"}, "status": {"dtype": "string", "id": null, "_type": "Value"}, "image": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "file_name": {"dtype": "string", "id": null, "_type": "Value"}, "coco_url": {"dtype": "string", "id": null, "_type": "Value"}, "height": {"dtype": "int32", "id": null, "_type": "Value"}, "width": {"dtype": "int32", "id": null, "_type": "Value"}, "license": {"dtype": "int32", "id": null, "_type": "Value"}, "open_images_id": {"dtype": "string", "id": null, "_type": "Value"}, "date_captured": {"dtype": "string", "id": null, "_type": "Value"}}, "objects": {"feature": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "bbox": {"feature": {"dtype": "float32", "id": null, "_type": "Value"}, "length": 4, "id": null, "_type": "Sequence"}, "category": {"dtype": "string", "id": null, "_type": "Value"}, "area": {"dtype": "float32", "id": null, "_type": "Value"}, "category_id": {"dtype": "int32", "id": null, "_type": "Value"}, "IsOccluded": {"dtype": "int32", "id": null, "_type": "Value"}, "IsTruncated": {"dtype": "int32", "id": null, "_type": "Value"}, "segment": {"feature": {"MaskPath": {"dtype": "string", "id": null, "_type": "Value"}, "LabelName": {"dtype": "string", "id": null, "_type": "Value"}, "BoxID": {"dtype": "string", "id": null, "_type": "Value"}, "BoxXMin": {"dtype": "string", "id": null, "_type": "Value"}, "BoxXMax": {"dtype": "string", "id": null, "_type": "Value"}, "BoxYMin": {"dtype": "string", "id": null, "_type": "Value"}, "BoxYMax": {"dtype": "string", "id": null, "_type": "Value"}, "PredictedIoU": {"dtype": "string", "id": null, "_type": "Value"}, "Clicks": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "compguesswhat", "config_name": "compguesswhat-zero_shot", "version": {"version_str": "0.2.0", "description": "Second CompGuessWhat?! release", "major": 0, "minor": 2, "patch": 0}, "splits": {"nd_valid": {"name": "nd_valid", "num_bytes": 13510725, "num_examples": 5343, "dataset_name": "compguesswhat"}, "nd_test": {"name": "nd_test", "num_bytes": 36228293, "num_examples": 13836, "dataset_name": "compguesswhat"}, "od_valid": {"name": "od_valid", "num_bytes": 14052108, "num_examples": 5372, "dataset_name": "compguesswhat"}, "od_test": {"name": "od_test", "num_bytes": 32951141, "num_examples": 13300, "dataset_name": "compguesswhat"}}, "download_checksums": {"https://www.dropbox.com/s/gd46azul7o7iip4/compguesswhat-zero_shot.zip?dl=1": {"num_bytes": 4845966, "checksum": "ebf444e14408dbff2d910ba9ec049cd42882f2ce52d465d0f78203ee454aa33e"}}, "download_size": 4845966, "post_processing_size": null, "dataset_size": 96742267, "size_in_bytes": 101588233}}
1+
{"compguesswhat-original": {"description": "\n CompGuessWhat?! is an instance of a multi-task framework for evaluating the quality of learned neural representations,\n in particular concerning attribute grounding. Use this dataset if you want to use the set of games whose reference\n scene is an image in VisualGenome. Visit the website for more details: https://compguesswhat.github.io\n ", "citation": " @inproceedings{suglia2020compguesswhat,\n title={CompGuessWhat?!: a Multi-task Evaluation Framework for Grounded Language Learning},\n author={Suglia, Alessandro, Konstas, Ioannis, Vanzo, Andrea, Bastianelli, Emanuele, Desmond Elliott, Stella Frank and Oliver Lemon},\n booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},\n year={2020}\n }\n ", "homepage": "https://compguesswhat.github.io/", "license": "", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "target_id": {"dtype": "int32", "id": null, "_type": "Value"}, "timestamp": {"dtype": "string", "id": null, "_type": "Value"}, "status": {"dtype": "string", "id": null, "_type": "Value"}, "image": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "file_name": {"dtype": "string", "id": null, "_type": "Value"}, "flickr_url": {"dtype": "string", "id": null, "_type": "Value"}, "coco_url": {"dtype": "string", "id": null, "_type": "Value"}, "height": {"dtype": "int32", "id": null, "_type": "Value"}, "width": {"dtype": "int32", "id": null, "_type": "Value"}, "visual_genome": {"width": {"dtype": "int32", "id": null, "_type": "Value"}, "height": {"dtype": "int32", "id": null, "_type": "Value"}, "url": {"dtype": "string", "id": null, "_type": "Value"}, "coco_id": {"dtype": "int32", "id": null, "_type": "Value"}, "flickr_id": {"dtype": "string", "id": null, "_type": "Value"}, "image_id": {"dtype": "string", "id": null, "_type": "Value"}}}, "qas": {"feature": {"question": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "int32", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "objects": {"feature": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "bbox": {"feature": {"dtype": "float32", "id": null, "_type": "Value"}, "length": 4, "id": null, "_type": "Sequence"}, "category": {"dtype": "string", "id": null, "_type": "Value"}, "area": {"dtype": "float32", "id": null, "_type": "Value"}, "category_id": {"dtype": "int32", "id": null, "_type": "Value"}, "segment": {"feature": {"feature": {"dtype": "float32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "length": -1, "id": null, "_type": "Sequence"}}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "compguesswhat", "config_name": "compguesswhat-original", "version": {"version_str": "0.2.0", "description": "Second CompGuessWhat?! release", "major": 0, "minor": 2, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 126548689, "num_examples": 46341, "dataset_name": "compguesswhat"}, "validation": {"name": "validation", "num_bytes": 26055261, "num_examples": 9738, "dataset_name": "compguesswhat"}, "test": {"name": "test", "num_bytes": 25981593, "num_examples": 9621, "dataset_name": "compguesswhat"}}, "download_checksums": {"https://www.dropbox.com/s/qd9wlydpkpmq8rr/compguesswhat-original.zip?dl=1": {"num_bytes": 107201655, "checksum": "f965b469142cf65635b2b8b309ba2b28539fd7ad40039b11856cd662f2b000c1"}}, "download_size": 107201655, "post_processing_size": null, "dataset_size": 178585543, "size_in_bytes": 285787198}, "compguesswhat-zero_shot": {"description": "\n CompGuessWhat?! is an instance of a multi-task framework for evaluating the quality of learned neural representations,\n in particular concerning attribute grounding. Use this dataset if you want to use the set of games whose reference\n scene is an image in VisualGenome. Visit the website for more details: https://compguesswhat.github.io\n ", "citation": " @inproceedings{suglia2020compguesswhat,\n title={CompGuessWhat?!: a Multi-task Evaluation Framework for Grounded Language Learning},\n author={Suglia, Alessandro, Konstas, Ioannis, Vanzo, Andrea, Bastianelli, Emanuele, Desmond Elliott, Stella Frank and Oliver Lemon},\n booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},\n year={2020}\n }\n ", "homepage": "https://compguesswhat.github.io/", "license": "", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "target_id": {"dtype": "string", "id": null, "_type": "Value"}, "status": {"dtype": "string", "id": null, "_type": "Value"}, "image": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "file_name": {"dtype": "string", "id": null, "_type": "Value"}, "coco_url": {"dtype": "string", "id": null, "_type": "Value"}, "height": {"dtype": "int32", "id": null, "_type": "Value"}, "width": {"dtype": "int32", "id": null, "_type": "Value"}, "license": {"dtype": "int32", "id": null, "_type": "Value"}, "open_images_id": {"dtype": "string", "id": null, "_type": "Value"}, "date_captured": {"dtype": "string", "id": null, "_type": "Value"}}, "objects": {"feature": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "bbox": {"feature": {"dtype": "float32", "id": null, "_type": "Value"}, "length": 4, "id": null, "_type": "Sequence"}, "category": {"dtype": "string", "id": null, "_type": "Value"}, "area": {"dtype": "float32", "id": null, "_type": "Value"}, "category_id": {"dtype": "int32", "id": null, "_type": "Value"}, "IsOccluded": {"dtype": "int32", "id": null, "_type": "Value"}, "IsTruncated": {"dtype": "int32", "id": null, "_type": "Value"}, "segment": {"feature": {"MaskPath": {"dtype": "string", "id": null, "_type": "Value"}, "LabelName": {"dtype": "string", "id": null, "_type": "Value"}, "BoxID": {"dtype": "string", "id": null, "_type": "Value"}, "BoxXMin": {"dtype": "string", "id": null, "_type": "Value"}, "BoxXMax": {"dtype": "string", "id": null, "_type": "Value"}, "BoxYMin": {"dtype": "string", "id": null, "_type": "Value"}, "BoxYMax": {"dtype": "string", "id": null, "_type": "Value"}, "PredictedIoU": {"dtype": "string", "id": null, "_type": "Value"}, "Clicks": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "compguesswhat", "config_name": "compguesswhat-zero_shot", "version": {"version_str": "0.2.0", "description": "Second CompGuessWhat?! release", "major": 0, "minor": 2, "patch": 0}, "splits": {"nd_valid": {"name": "nd_valid", "num_bytes": 13557059, "num_examples": 5343, "dataset_name": "compguesswhat"}, "nd_test": {"name": "nd_test", "num_bytes": 36352201, "num_examples": 13836, "dataset_name": "compguesswhat"}, "od_valid": {"name": "od_valid", "num_bytes": 14093233, "num_examples": 5372, "dataset_name": "compguesswhat"}, "od_test": {"name": "od_test", "num_bytes": 33049755, "num_examples": 13300, "dataset_name": "compguesswhat"}}, "download_checksums": {"https://www.dropbox.com/s/f5o2t7aiok7kpcm/compguesswhat-zero_shot.zip?dl=1": {"num_bytes": 4845966, "checksum": "ebf444e14408dbff2d910ba9ec049cd42882f2ce52d465d0f78203ee454aa33e"}}, "download_size": 4845966, "post_processing_size": null, "dataset_size": 97052248, "size_in_bytes": 101898214}}

0 commit comments

Comments
 (0)