diff --git a/datasets/pass/README.md b/datasets/pass/README.md index 6e56a1c5a3c..178fc54b7d3 100644 --- a/datasets/pass/README.md +++ b/datasets/pass/README.md @@ -111,7 +111,7 @@ A data point comprises an image and its meta-data: ### Data Splits -All the data is contained in training set. The training set has 1.4M (1,439,719) instances. +All the data is contained in the training set. The training set has 1,439,588 instances as this implementation corresponds to the most recent release (v3) from the [version history](https://github.com/yukimasano/PASS/blob/main/version_history.txt). From the paper: diff --git a/datasets/pass/dataset_infos.json b/datasets/pass/dataset_infos.json index a2adf252b4f..68c779263c4 100644 --- a/datasets/pass/dataset_infos.json +++ b/datasets/pass/dataset_infos.json @@ -1 +1 @@ -{"default": {"description": "PASS (Pictures without humAns for Self-Supervision) is a large-scale dataset of 1,440,191 images that does not include any humans\nand which can be used for high-quality pretraining while significantly reducing privacy concerns.\nThe PASS images are sourced from the YFCC-100M dataset.\n", "citation": "@Article{asano21pass,\nauthor = \"Yuki M. Asano and Christian Rupprecht and Andrew Zisserman and Andrea Vedaldi\",\ntitle = \"PASS: An ImageNet replacement for self-supervised pretraining without humans\",\njournal = \"NeurIPS Track on Datasets and Benchmarks\",\nyear = \"2021\"\n}\n", "homepage": "https://www.robots.ox.ac.uk/~vgg/research/pass/", "license": "Creative Commons Attribution 4.0 International", "features": {"image": {"id": null, "_type": "Image"}, "creator_username": {"dtype": "string", "id": null, "_type": "Value"}, "hash": {"dtype": "string", "id": null, "_type": "Value"}, "gps_latitude": {"dtype": "float32", "id": null, "_type": "Value"}, "gps_longitude": {"dtype": "float32", "id": null, "_type": "Value"}, "date_taken": {"dtype": "timestamp[us]", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "pass", "config_name": "default", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 178578279339, "num_examples": 1439719, "dataset_name": "pass"}}, "download_checksums": {"https://zenodo.org/record/5570664/files/pass_metadata.csv?download=1": {"num_bytes": 151124344, "checksum": "86eeb812aa5fed17eb06f6902c77f695c2a17de489569428833526adc61fe669"}, "https://zenodo.org/record/5570664/files/PASS.0.tar?download=1": {"num_bytes": 18719498240, "checksum": "4f1380dad26a51c8ee4459943b795a71df0b1fe228eec8294c39f84215d8252d"}, "https://zenodo.org/record/5570664/files/PASS.1.tar?download=1": {"num_bytes": 18702233600, "checksum": "f573d0b994224d2e5c8a47a4e16a228d64744ddf235fdae5704eb2843b2c8536"}, "https://zenodo.org/record/5570664/files/PASS.2.tar?download=1": {"num_bytes": 18708899840, "checksum": "f46791122c4e75a77131b56b64eab6aa813de629a963f557c280a626a636fbbd"}, "https://zenodo.org/record/5570664/files/PASS.3.tar?download=1": {"num_bytes": 18705152000, "checksum": "ee761f4792eb3e7160d4aa62cb59e0c4c263f64b4ac1ef621ab16103041200ba"}, "https://zenodo.org/record/5570664/files/PASS.4.tar?download=1": {"num_bytes": 18697226240, "checksum": "e699129fc91e164a51c5e79267f5a288e8eb41979eab22455714ff1e90c9cb63"}, "https://zenodo.org/record/5570664/files/PASS.5.tar?download=1": {"num_bytes": 18690590720, "checksum": "3bb284916640f216c554958030936c9e9930a517496310f0ffbaabde51e01c79"}, "https://zenodo.org/record/5570664/files/PASS.6.tar?download=1": {"num_bytes": 18693263360, "checksum": "cb633e82e9fe9be2b81182fd5bff863f5b0a75373746169d0595be7fcdcc374e"}, "https://zenodo.org/record/5570664/files/PASS.7.tar?download=1": {"num_bytes": 18709043200, "checksum": "236a0815368c339d14aa3634b0a3be11b9638d245200c78271a74b2c753c228e"}, "https://zenodo.org/record/5570664/files/PASS.8.tar?download=1": {"num_bytes": 18702499840, "checksum": "91dec78455e56559cda092935ba01ac6978cea78d24e21e27f2dbc170314cf1a"}, "https://zenodo.org/record/5570664/files/PASS.9.tar?download=1": {"num_bytes": 11174297600, "checksum": "fba7f2414beffa2163b6cec1641b9e434d54abaa1a509086d5bd22c3122537e2"}}, "download_size": 179653828984, "post_processing_size": null, "dataset_size": 178578279339, "size_in_bytes": 358232108323}} \ No newline at end of file +{"default": {"description": "PASS (Pictures without humAns for Self-Supervision) is a large-scale dataset of 1,440,191 images that does not include any humans\nand which can be used for high-quality pretraining while significantly reducing privacy concerns.\nThe PASS images are sourced from the YFCC-100M dataset.\n", "citation": "@Article{asano21pass,\nauthor = \"Yuki M. Asano and Christian Rupprecht and Andrew Zisserman and Andrea Vedaldi\",\ntitle = \"PASS: An ImageNet replacement for self-supervised pretraining without humans\",\njournal = \"NeurIPS Track on Datasets and Benchmarks\",\nyear = \"2021\"\n}\n", "homepage": "https://www.robots.ox.ac.uk/~vgg/research/pass/", "license": "Creative Commons Attribution 4.0 International", "features": {"image": {"decode": true, "id": null, "_type": "Image"}, "creator_username": {"dtype": "string", "id": null, "_type": "Value"}, "hash": {"dtype": "string", "id": null, "_type": "Value"}, "gps_latitude": {"dtype": "float32", "id": null, "_type": "Value"}, "gps_longitude": {"dtype": "float32", "id": null, "_type": "Value"}, "date_taken": {"dtype": "timestamp[us]", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "pass", "config_name": "default", "version": {"version_str": "2.0.0", "description": null, "major": 2, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 178563446100, "num_examples": 1439588, "dataset_name": "pass"}}, "download_checksums": {"https://zenodo.org/record/6615455/files/pass_metadata.csv?download=1": {"num_bytes": 152590171, "checksum": "8b6fde80b48326bda9da0a7c48f92146a58a847f76a1dc40603b7e4f73f5e798"}, "https://zenodo.org/record/6615455/files/PASS.0.tar?download=1": {"num_bytes": 18704343040, "checksum": "5ff59cbf372a05d82c22afa8191eb229959730fd371b84a0ec784e7f91b1d285"}, "https://zenodo.org/record/6615455/files/PASS.1.tar?download=1": {"num_bytes": 18675681280, "checksum": "d7b3a1d82c9695f31e9d8ebb71f450afef39e3353c608971fc74780f59e909fd"}, "https://zenodo.org/record/6615455/files/PASS.2.tar?download=1": {"num_bytes": 18743080960, "checksum": "b62021816caac550580fcace7e74e8cb7358663006a7e28707c602859aa51243"}, "https://zenodo.org/record/6615455/files/PASS.3.tar?download=1": {"num_bytes": 18694615040, "checksum": "2af47eab3a026ca5a747cccf3d6ababbd5a9b028d99d6f76e92a01d2f2b64276"}, "https://zenodo.org/record/6615455/files/PASS.4.tar?download=1": {"num_bytes": 18673643520, "checksum": "0170c73cc846e07fbf2bbece0ea62a893b0d440b937dcac965e5b25f4e75006d"}, "https://zenodo.org/record/6615455/files/PASS.5.tar?download=1": {"num_bytes": 18715811840, "checksum": "79ccc5167fa0e7950fc6dca1eec3c9b8db1d31313a83c8a19d2f9b9a88d1eeaf"}, "https://zenodo.org/record/6615455/files/PASS.6.tar?download=1": {"num_bytes": 18728970240, "checksum": "adabc1ab18be4587a4c1882633015845681e9d8551075406e8d83a025819eef6"}, "https://zenodo.org/record/6615455/files/PASS.7.tar?download=1": {"num_bytes": 18703380480, "checksum": "8661044c3a46f1e2f5c2fa96b9deb48c6b2d218ac9753b139073186920c15ab7"}, "https://zenodo.org/record/6615455/files/PASS.8.tar?download=1": {"num_bytes": 18695772160, "checksum": "0dba1ac815c001d9a83787321e1929b03cebcccc1a8f12725eec866e80480a1f"}, "https://zenodo.org/record/6615455/files/PASS.9.tar?download=1": {"num_bytes": 11152302080, "checksum": "9a7ab254638f4bf21267aa0165452572ae52d82aabb451a65e56a2d51a17e982"}}, "download_size": 179640190811, "post_processing_size": null, "dataset_size": 178563446100, "size_in_bytes": 358203636911}} \ No newline at end of file diff --git a/datasets/pass/dummy/1.0.0/dummy_data.zip b/datasets/pass/dummy/2.0.0/dummy_data.zip similarity index 100% rename from datasets/pass/dummy/1.0.0/dummy_data.zip rename to datasets/pass/dummy/2.0.0/dummy_data.zip diff --git a/datasets/pass/pass.py b/datasets/pass/pass.py index 17a37bd24cc..c72327aff7c 100644 --- a/datasets/pass/pass.py +++ b/datasets/pass/pass.py @@ -41,15 +41,17 @@ _LICENSE = "Creative Commons Attribution 4.0 International" -_IMAGE_ARCHIVE_DOWNLOAD_URL_TEMPLATE = "https://zenodo.org/record/5570664/files/PASS.{idx}.tar?download=1" +_IMAGE_ARCHIVE_DOWNLOAD_URL_TEMPLATE = "https://zenodo.org/record/6615455/files/PASS.{idx}.tar?download=1" -_METADATA_DOWNLOAD_URL = "https://zenodo.org/record/5570664/files/pass_metadata.csv?download=1" +_METADATA_DOWNLOAD_URL = "https://zenodo.org/record/6615455/files/pass_metadata.csv?download=1" class PASS(datasets.GeneratorBasedBuilder): """PASS dataset.""" - VERSION = datasets.Version("1.0.0") + # 1.0.0 - v2 from https://github.com/yukimasano/PASS/blob/6226b456d23efa56b44e79648a9913e086d57335/version_history.txt + # 2.0.0 - v3 from https://github.com/yukimasano/PASS/blob/6226b456d23efa56b44e79648a9913e086d57335/version_history.txt + VERSION = datasets.Version("2.0.0") def _info(self): return datasets.DatasetInfo(