Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datasets/ubuntu_dialogs_corpus/dataset_infos.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"train": {"description": "Ubuntu Dialogue Corpus, a dataset containing almost 1 million multi-turn dialogues, with a total of over 7 million utterances and 100 million words. This provides a unique resource for research into building dialogue managers based on neural language models that can make use of large amounts of unlabeled data. The dataset has both the multi-turn property of conversations in the Dialog State Tracking Challenge datasets, and the unstructured nature of interactions from microblog services such as Twitter.\n", "citation": "@article{DBLP:journals/corr/LowePSP15,\n author = {Ryan Lowe and\n Nissan Pow and\n Iulian Serban and\n Joelle Pineau},\n title = {The Ubuntu Dialogue Corpus: {A} Large Dataset for Research in Unstructured\n Multi-Turn Dialogue Systems},\n journal = {CoRR},\n volume = {abs/1506.08909},\n year = {2015},\n url = {http://arxiv.org/abs/1506.08909},\n archivePrefix = {arXiv},\n eprint = {1506.08909},\n timestamp = {Mon, 13 Aug 2018 16:48:23 +0200},\n biburl = {https://dblp.org/rec/journals/corr/LowePSP15.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n", "homepage": "https://github.com/rkadlec/ubuntu-ranking-dataset-creator", "license": "", "features": {"Context": {"dtype": "string", "id": null, "_type": "Value"}, "Utterance": {"dtype": "string", "id": null, "_type": "Value"}, "Label": {"dtype": "int32", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "ubuntu_dialogs_corpus", "config_name": "train", "version": {"version_str": "2.0.0", "description": null, "datasets_version_to_prepare": null, "major": 2, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 65497027, "num_examples": 127422, "dataset_name": "ubuntu_dialogs_corpus"}}, "download_checksums": {}, "download_size": 0, "dataset_size": 65497027, "size_in_bytes": 65497027}}
{"train": {"description": "Ubuntu Dialogue Corpus, a dataset containing almost 1 million multi-turn dialogues, with a total of over 7 million utterances and 100 million words. This provides a unique resource for research into building dialogue managers based on neural language models that can make use of large amounts of unlabeled data. The dataset has both the multi-turn property of conversations in the Dialog State Tracking Challenge datasets, and the unstructured nature of interactions from microblog services such as Twitter.\n", "citation": "@article{DBLP:journals/corr/LowePSP15,\n author = {Ryan Lowe and\n Nissan Pow and\n Iulian Serban and\n Joelle Pineau},\n title = {The Ubuntu Dialogue Corpus: {A} Large Dataset for Research in Unstructured\n Multi-Turn Dialogue Systems},\n journal = {CoRR},\n volume = {abs/1506.08909},\n year = {2015},\n url = {http://arxiv.org/abs/1506.08909},\n archivePrefix = {arXiv},\n eprint = {1506.08909},\n timestamp = {Mon, 13 Aug 2018 16:48:23 +0200},\n biburl = {https://dblp.org/rec/journals/corr/LowePSP15.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n", "homepage": "https://github.com/rkadlec/ubuntu-ranking-dataset-creator", "license": "", "features": {"Context": {"dtype": "string", "id": null, "_type": "Value"}, "Utterance": {"dtype": "string", "id": null, "_type": "Value"}, "Label": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ubuntu_dialogs_corpus", "config_name": "train", "version": {"version_str": "2.0.0", "description": null, "major": 2, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 525126729, "num_examples": 1000000, "dataset_name": "ubuntu_dialogs_corpus"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 525126729, "size_in_bytes": 525126729}, "dev_test": {"description": "Ubuntu Dialogue Corpus, a dataset containing almost 1 million multi-turn dialogues, with a total of over 7 million utterances and 100 million words. This provides a unique resource for research into building dialogue managers based on neural language models that can make use of large amounts of unlabeled data. The dataset has both the multi-turn property of conversations in the Dialog State Tracking Challenge datasets, and the unstructured nature of interactions from microblog services such as Twitter.\n", "citation": "@article{DBLP:journals/corr/LowePSP15,\n author = {Ryan Lowe and\n Nissan Pow and\n Iulian Serban and\n Joelle Pineau},\n title = {The Ubuntu Dialogue Corpus: {A} Large Dataset for Research in Unstructured\n Multi-Turn Dialogue Systems},\n journal = {CoRR},\n volume = {abs/1506.08909},\n year = {2015},\n url = {http://arxiv.org/abs/1506.08909},\n archivePrefix = {arXiv},\n eprint = {1506.08909},\n timestamp = {Mon, 13 Aug 2018 16:48:23 +0200},\n biburl = {https://dblp.org/rec/journals/corr/LowePSP15.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n", "homepage": "https://github.com/rkadlec/ubuntu-ranking-dataset-creator", "license": "", "features": {"Context": {"dtype": "string", "id": null, "_type": "Value"}, "Ground Truth Utterance": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_0": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_1": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_2": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_3": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_4": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_5": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_6": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_7": {"dtype": "string", "id": null, "_type": "Value"}, "Distractor_8": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ubuntu_dialogs_corpus", "config_name": "dev_test", "version": {"version_str": "2.0.0", "description": null, "major": 2, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 27060502, "num_examples": 18920, "dataset_name": "ubuntu_dialogs_corpus"}, "validation": {"name": "validation", "num_bytes": 27663181, "num_examples": 19560, "dataset_name": "ubuntu_dialogs_corpus"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 54723683, "size_in_bytes": 54723683}}