From 79a7e8a603e6666732954755553dadbe54370e31 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 30 Aug 2022 11:45:18 +0200 Subject: [PATCH 1/2] Support streaming swda dataset --- datasets/swda/swda.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/datasets/swda/swda.py b/datasets/swda/swda.py index 4ad179b55e3..9c0b573e732 100644 --- a/datasets/swda/swda.py +++ b/datasets/swda/swda.py @@ -435,10 +435,8 @@ def _split_generators(self, dl_manager): dl_dir = dl_manager.download_and_extract(_URL) # Use swda/ folder. data_dir = os.path.join(dl_dir, "swda") - # Handle partitions files. - urls_to_download = self._URLS - # Download extract and return paths of split files. - downloaded_files = dl_manager.download_and_extract(urls_to_download) + # Handle partitions files: download extract and return paths of split files. + downloaded_files = dl_manager.download(self._URLS) return [ # Return whole data path and train splits file downloaded path. @@ -476,7 +474,8 @@ def _generate_examples(self, data_dir, split_file): """ # Read in the split file. - split_file = io.open(file=split_file, mode="r", encoding="utf-8").read().splitlines() + with open(file=split_file, mode="r", encoding="utf-8") as f: + split_file = f.read().splitlines() # Read in corpus data using split files. corpus = CorpusReader(src_dirname=data_dir, split_file=split_file) # Generate examples. From d2b63f6e399cb5d25c3928dfa5fa1b50a24c7eee Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 30 Aug 2022 12:41:53 +0200 Subject: [PATCH 2/2] Remove unused import --- datasets/swda/swda.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datasets/swda/swda.py b/datasets/swda/swda.py index 9c0b573e732..7323f871832 100644 --- a/datasets/swda/swda.py +++ b/datasets/swda/swda.py @@ -26,7 +26,6 @@ import csv import datetime import glob -import io import os import re