Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datasets/web_of_science/dataset_infos.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"WOS5736": {"description": "Copyright (c) 2017 Kamran Kowsari\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this dataset and associated documentation files (the \"Dataset\"), to deal\nin the dataset without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Dataset, and to permit persons to whom the dataset is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Dataset.\n\nIf you use this dataset please cite: Referenced paper: HDLTex: Hierarchical Deep Learning for Text Classification\n\nDescription of Dataset:\n\nHere is three datasets which include WOS-11967 , WOS-46985, and WOS-5736\nEach folder contains:\n-X.txt\n-Y.txt\n-YL1.txt\n-YL2.txt\n\nX is input data that include text sequences\nY is target value\nYL1 is target value of level one (parent label)\nYL2 is target value of level one (child label)\nWeb of Science Dataset WOS-5736\n -This dataset contains 5,736 documents with 11 categories which include 3 parents categories.", "citation": "@inproceedings{kowsari2017HDLTex,\ntitle={HDLTex: Hierarchical Deep Learning for Text Classification},\nauthor={Kowsari, Kamran and Brown, Donald E and Heidarysafa, Mojtaba and Jafari Meimandi, Kiana and and Gerber, Matthew S and Barnes, Laura E},\nbooktitle={Machine Learning and Applications (ICMLA), 2017 16th IEEE International Conference on},\nyear={2017},\norganization={IEEE}\n}\n", "homepage": "https://data.mendeley.com/datasets/9rw3vkcfy4/6", "license": "", "features": {"input_data": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_1": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_2": {"dtype": "int32", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "web_of_science", "config_name": "WOS5736", "version": {"version_str": "6.0.0", "description": "", "datasets_version_to_prepare": null, "major": 6, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 8055118, "num_examples": 5736, "dataset_name": "web_of_science"}}, "download_checksums": {"https://data.mendeley.com/datasets/9rw3vkcfy4/6/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/WebOfScience.zip?dl=1": {"num_bytes": 60222421, "checksum": "b787d484bff88b0dcdb3fa291d06ec9d2f025dc2a67ce1045d0c688cd96ccf8a"}}, "download_size": 60222421, "dataset_size": 8055118, "size_in_bytes": 68277539}, "WOS11967": {"description": "Copyright (c) 2017 Kamran Kowsari\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this dataset and associated documentation files (the \"Dataset\"), to deal\nin the dataset without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Dataset, and to permit persons to whom the dataset is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Dataset.\n\nIf you use this dataset please cite: Referenced paper: HDLTex: Hierarchical Deep Learning for Text Classification\n\nDescription of Dataset:\n\nHere is three datasets which include WOS-11967 , WOS-46985, and WOS-5736\nEach folder contains:\n-X.txt\n-Y.txt\n-YL1.txt\n-YL2.txt\n\nX is input data that include text sequences\nY is target value\nYL1 is target value of level one (parent label)\nYL2 is target value of level one (child label)\nWeb of Science Dataset WOS-11967\n -This dataset contains 11,967 documents with 35 categories which include 7 parents categories.", "citation": "@inproceedings{kowsari2017HDLTex,\ntitle={HDLTex: Hierarchical Deep Learning for Text Classification},\nauthor={Kowsari, Kamran and Brown, Donald E and Heidarysafa, Mojtaba and Jafari Meimandi, Kiana and and Gerber, Matthew S and Barnes, Laura E},\nbooktitle={Machine Learning and Applications (ICMLA), 2017 16th IEEE International Conference on},\nyear={2017},\norganization={IEEE}\n}\n", "homepage": "https://data.mendeley.com/datasets/9rw3vkcfy4/6", "license": "", "features": {"input_data": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_1": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_2": {"dtype": "int32", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "web_of_science", "config_name": "WOS11967", "version": {"version_str": "6.0.0", "description": "", "datasets_version_to_prepare": null, "major": 6, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 16255871, "num_examples": 11967, "dataset_name": "web_of_science"}}, "download_checksums": {"https://data.mendeley.com/datasets/9rw3vkcfy4/6/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/WebOfScience.zip?dl=1": {"num_bytes": 60222421, "checksum": "b787d484bff88b0dcdb3fa291d06ec9d2f025dc2a67ce1045d0c688cd96ccf8a"}}, "download_size": 60222421, "dataset_size": 16255871, "size_in_bytes": 76478292}, "WOS46985": {"description": "Copyright (c) 2017 Kamran Kowsari\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this dataset and associated documentation files (the \"Dataset\"), to deal\nin the dataset without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Dataset, and to permit persons to whom the dataset is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Dataset.\n\nIf you use this dataset please cite: Referenced paper: HDLTex: Hierarchical Deep Learning for Text Classification\n\nDescription of Dataset:\n\nHere is three datasets which include WOS-11967 , WOS-46985, and WOS-5736\nEach folder contains:\n-X.txt\n-Y.txt\n-YL1.txt\n-YL2.txt\n\nX is input data that include text sequences\nY is target value\nYL1 is target value of level one (parent label)\nYL2 is target value of level one (child label)\n\n Web of Science Dataset WOS-46985\n -This dataset contains 46,985 documents with 134 categories which include 7 parents categories.", "citation": "@inproceedings{kowsari2017HDLTex,\ntitle={HDLTex: Hierarchical Deep Learning for Text Classification},\nauthor={Kowsari, Kamran and Brown, Donald E and Heidarysafa, Mojtaba and Jafari Meimandi, Kiana and and Gerber, Matthew S and Barnes, Laura E},\nbooktitle={Machine Learning and Applications (ICMLA), 2017 16th IEEE International Conference on},\nyear={2017},\norganization={IEEE}\n}\n", "homepage": "https://data.mendeley.com/datasets/9rw3vkcfy4/6", "license": "", "features": {"input_data": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_1": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_2": {"dtype": "int32", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "web_of_science", "config_name": "WOS46985", "version": {"version_str": "6.0.0", "description": "", "datasets_version_to_prepare": null, "major": 6, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 65501096, "num_examples": 46985, "dataset_name": "web_of_science"}}, "download_checksums": {"https://data.mendeley.com/datasets/9rw3vkcfy4/6/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/WebOfScience.zip?dl=1": {"num_bytes": 60222421, "checksum": "b787d484bff88b0dcdb3fa291d06ec9d2f025dc2a67ce1045d0c688cd96ccf8a"}}, "download_size": 60222421, "dataset_size": 65501096, "size_in_bytes": 125723517}}
{"WOS5736": {"description": "The Web Of Science (WOS) dataset is a collection of data of published papers\navailable from the Web of Science. WOS has been released in three versions: WOS-46985, WOS-11967 and WOS-5736. WOS-46985 is the\nfull dataset. WOS-11967 and WOS-5736 are two subsets of WOS-46985.\n\nWeb of Science Dataset WOS-5736: This dataset contains 5,736 documents with 11 categories which include 3 parents categories.", "citation": "@inproceedings{kowsari2017HDLTex,\ntitle={HDLTex: Hierarchical Deep Learning for Text Classification},\nauthor={Kowsari, Kamran and Brown, Donald E and Heidarysafa, Mojtaba and Jafari Meimandi, Kiana and and Gerber, Matthew S and Barnes, Laura E},\nbooktitle={Machine Learning and Applications (ICMLA), 2017 16th IEEE International Conference on},\nyear={2017},\norganization={IEEE}\n}\n", "homepage": "https://data.mendeley.com/datasets/9rw3vkcfy4/6", "license": "", "features": {"input_data": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_1": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_2": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "builder_name": "web_of_science", "config_name": "WOS5736", "version": {"version_str": "6.0.0", "description": "", "major": 6, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 8051533, "num_examples": 5736, "dataset_name": "web_of_science"}}, "download_checksums": {"https://data.mendeley.com/public-files/datasets/9rw3vkcfy4/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/file_downloaded": {"num_bytes": 60222421, "checksum": "b787d484bff88b0dcdb3fa291d06ec9d2f025dc2a67ce1045d0c688cd96ccf8a"}}, "download_size": 60222421, "post_processing_size": null, "dataset_size": 8051533, "size_in_bytes": 68273954}, "WOS11967": {"description": "The Web Of Science (WOS) dataset is a collection of data of published papers\navailable from the Web of Science. WOS has been released in three versions: WOS-46985, WOS-11967 and WOS-5736. WOS-46985 is the\nfull dataset. WOS-11967 and WOS-5736 are two subsets of WOS-46985.\n\nWeb of Science Dataset WOS-11967: This dataset contains 11,967 documents with 35 categories which include 7 parents categories.", "citation": "@inproceedings{kowsari2017HDLTex,\ntitle={HDLTex: Hierarchical Deep Learning for Text Classification},\nauthor={Kowsari, Kamran and Brown, Donald E and Heidarysafa, Mojtaba and Jafari Meimandi, Kiana and and Gerber, Matthew S and Barnes, Laura E},\nbooktitle={Machine Learning and Applications (ICMLA), 2017 16th IEEE International Conference on},\nyear={2017},\norganization={IEEE}\n}\n", "homepage": "https://data.mendeley.com/datasets/9rw3vkcfy4/6", "license": "", "features": {"input_data": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_1": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_2": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "builder_name": "web_of_science", "config_name": "WOS11967", "version": {"version_str": "6.0.0", "description": "", "major": 6, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 16248391, "num_examples": 11967, "dataset_name": "web_of_science"}}, "download_checksums": {"https://data.mendeley.com/public-files/datasets/9rw3vkcfy4/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/file_downloaded": {"num_bytes": 60222421, "checksum": "b787d484bff88b0dcdb3fa291d06ec9d2f025dc2a67ce1045d0c688cd96ccf8a"}}, "download_size": 60222421, "post_processing_size": null, "dataset_size": 16248391, "size_in_bytes": 76470812}, "WOS46985": {"description": "The Web Of Science (WOS) dataset is a collection of data of published papers\navailable from the Web of Science. WOS has been released in three versions: WOS-46985, WOS-11967 and WOS-5736. WOS-46985 is the\nfull dataset. WOS-11967 and WOS-5736 are two subsets of WOS-46985.\n\nWeb of Science Dataset WOS-46985: This dataset contains 46,985 documents with 134 categories which include 7 parents categories.", "citation": "@inproceedings{kowsari2017HDLTex,\ntitle={HDLTex: Hierarchical Deep Learning for Text Classification},\nauthor={Kowsari, Kamran and Brown, Donald E and Heidarysafa, Mojtaba and Jafari Meimandi, Kiana and and Gerber, Matthew S and Barnes, Laura E},\nbooktitle={Machine Learning and Applications (ICMLA), 2017 16th IEEE International Conference on},\nyear={2017},\norganization={IEEE}\n}\n", "homepage": "https://data.mendeley.com/datasets/9rw3vkcfy4/6", "license": "", "features": {"input_data": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_1": {"dtype": "int32", "id": null, "_type": "Value"}, "label_level_2": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "builder_name": "web_of_science", "config_name": "WOS46985", "version": {"version_str": "6.0.0", "description": "", "major": 6, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 65471726, "num_examples": 46985, "dataset_name": "web_of_science"}}, "download_checksums": {"https://data.mendeley.com/public-files/datasets/9rw3vkcfy4/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/file_downloaded": {"num_bytes": 60222421, "checksum": "b787d484bff88b0dcdb3fa291d06ec9d2f025dc2a67ce1045d0c688cd96ccf8a"}}, "download_size": 60222421, "post_processing_size": null, "dataset_size": 65471726, "size_in_bytes": 125694147}}
4 changes: 1 addition & 3 deletions datasets/web_of_science/web_of_science.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@

"""

_DATA_URL = (
"https://data.mendeley.com/datasets/9rw3vkcfy4/6/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/WebOfScience.zip?dl=1"
)
_DATA_URL = "https://data.mendeley.com/public-files/datasets/9rw3vkcfy4/files/c9ea673d-5542-44c0-ab7b-f1311f7d61df/file_downloaded"


class WebOfScienceConfig(datasets.BuilderConfig):
Expand Down