Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion datasets/billsum/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ language_creators:
language:
- en
license:
- unknown
- cc0-1.0
multilinguality:
- monolingual
size_categories:
Expand Down Expand Up @@ -132,6 +132,8 @@ The data fields are the same among all splits.

### Source Data

The data consists of three parts: US training bills, US test bills and California test bills. The US bills were collected from the [Govinfo](https://github.com/unitedstates/congress) service provided by the United States Government Publishing Office (GPO) under CC0-1.0 license. The California, bills from the 2015-2016 session were are from the legislature’s [website](https://leginfo.legislature.ca.gov/).

#### Initial Data Collection and Normalization

[More Information Needed](https://github.com/huggingface/datasets/blob/master/CONTRIBUTING.md#how-to-contribute-to-the-dataset-cards)
Expand Down
3 changes: 3 additions & 0 deletions datasets/billsum/billsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@

_URL = "https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx"

_LICENSE = "CC0"

_DOCUMENT = "text"
_SUMMARY = "summary"

Expand All @@ -63,6 +65,7 @@ class Billsum(datasets.GeneratorBasedBuilder):
def _info(self):
return datasets.DatasetInfo(
description=_DESCRIPTION,
license=_LICENSE,
features=datasets.Features(
{
_DOCUMENT: datasets.Value("string"),
Expand Down
2 changes: 1 addition & 1 deletion datasets/billsum/dataset_infos.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": {"input": "text", "output": "summary"}, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "datasets_version_to_prepare": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"ca_test": {"name": "ca_test", "num_bytes": 14945923, "num_examples": 1237, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37867905, "num_examples": 3269, "dataset_name": "billsum"}, "train": {"name": "train", "num_bytes": 219605578, "num_examples": 18949, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "dataset_size": 272419406, "size_in_bytes": 339680082}}
{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "CC0", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "text", "output": "summary"}, "task_templates": null, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 219596090, "num_examples": 18949, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37866257, "num_examples": 3269, "dataset_name": "billsum"}, "ca_test": {"name": "ca_test", "num_bytes": 14945291, "num_examples": 1237, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "post_processing_size": null, "dataset_size": 272407638, "size_in_bytes": 339668314}}