Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions datasets/billsum/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ language_creators:
language:
- en
license:
- unknown
- cc0-1.0
multilinguality:
- monolingual
size_categories:
Expand Down Expand Up @@ -134,7 +134,7 @@ The data fields are the same among all splits.

#### Initial Data Collection and Normalization

[More Information Needed](https://github.com/huggingface/datasets/blob/master/CONTRIBUTING.md#how-to-contribute-to-the-dataset-cards)
The data consists of three parts: US training bills, US test bills and California test bills. The US bills were collected from the [Govinfo](https://github.com/unitedstates/congress) service provided by the United States Government Publishing Office (GPO) under CC0-1.0 license. The California, bills from the 2015-2016 session are available from the legislature’s [website](https://leginfo.legislature.ca.gov/).

#### Who are the source language producers?

Expand Down
3 changes: 3 additions & 0 deletions datasets/billsum/billsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@

_URL = "https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx"

_LICENSE = "CC0"

_DOCUMENT = "text"
_SUMMARY = "summary"

Expand All @@ -63,6 +65,7 @@ class Billsum(datasets.GeneratorBasedBuilder):
def _info(self):
return datasets.DatasetInfo(
description=_DESCRIPTION,
license=_LICENSE,
features=datasets.Features(
{
_DOCUMENT: datasets.Value("string"),
Expand Down
2 changes: 1 addition & 1 deletion datasets/billsum/dataset_infos.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": {"input": "text", "output": "summary"}, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "datasets_version_to_prepare": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"ca_test": {"name": "ca_test", "num_bytes": 14945923, "num_examples": 1237, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37867905, "num_examples": 3269, "dataset_name": "billsum"}, "train": {"name": "train", "num_bytes": 219605578, "num_examples": 18949, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "dataset_size": 272419406, "size_in_bytes": 339680082}}
{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "CC0", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "text", "output": "summary"}, "task_templates": null, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 219596090, "num_examples": 18949, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37866257, "num_examples": 3269, "dataset_name": "billsum"}, "ca_test": {"name": "ca_test", "num_bytes": 14945291, "num_examples": 1237, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "post_processing_size": null, "dataset_size": 272407638, "size_in_bytes": 339668314}}