diff --git a/datasets/billsum/README.md b/datasets/billsum/README.md index e6472045c40..99f23568133 100644 --- a/datasets/billsum/README.md +++ b/datasets/billsum/README.md @@ -6,7 +6,7 @@ language_creators: language: - en license: -- unknown +- cc0-1.0 multilinguality: - monolingual size_categories: @@ -134,7 +134,7 @@ The data fields are the same among all splits. #### Initial Data Collection and Normalization -[More Information Needed](https://github.com/huggingface/datasets/blob/master/CONTRIBUTING.md#how-to-contribute-to-the-dataset-cards) +The data consists of three parts: US training bills, US test bills and California test bills. The US bills were collected from the [Govinfo](https://github.com/unitedstates/congress) service provided by the United States Government Publishing Office (GPO) under CC0-1.0 license. The California, bills from the 2015-2016 session are available from the legislature’s [website](https://leginfo.legislature.ca.gov/). #### Who are the source language producers? diff --git a/datasets/billsum/billsum.py b/datasets/billsum/billsum.py index 4e7ca20475d..4285236bb06 100644 --- a/datasets/billsum/billsum.py +++ b/datasets/billsum/billsum.py @@ -48,6 +48,8 @@ _URL = "https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx" +_LICENSE = "CC0" + _DOCUMENT = "text" _SUMMARY = "summary" @@ -63,6 +65,7 @@ class Billsum(datasets.GeneratorBasedBuilder): def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, + license=_LICENSE, features=datasets.Features( { _DOCUMENT: datasets.Value("string"), diff --git a/datasets/billsum/dataset_infos.json b/datasets/billsum/dataset_infos.json index 703aba30584..05919a0cfd6 100644 --- a/datasets/billsum/dataset_infos.json +++ b/datasets/billsum/dataset_infos.json @@ -1 +1 @@ -{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": {"input": "text", "output": "summary"}, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "datasets_version_to_prepare": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"ca_test": {"name": "ca_test", "num_bytes": 14945923, "num_examples": 1237, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37867905, "num_examples": 3269, "dataset_name": "billsum"}, "train": {"name": "train", "num_bytes": 219605578, "num_examples": 18949, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "dataset_size": 272419406, "size_in_bytes": 339680082}} +{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "CC0", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "text", "output": "summary"}, "task_templates": null, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 219596090, "num_examples": 18949, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37866257, "num_examples": 3269, "dataset_name": "billsum"}, "ca_test": {"name": "ca_test", "num_bytes": 14945291, "num_examples": 1237, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "post_processing_size": null, "dataset_size": 272407638, "size_in_bytes": 339668314}} \ No newline at end of file