Skip to content

Commit 97bce1c

Browse files
committed
replace download_and_extract by simple download when there's no extraction in datasets scripts [A-D]
1 parent a3df1e4 commit 97bce1c

44 files changed

Lines changed: 61 additions & 62 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

datasets/acronym_identification/acronym_identification.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ def _info(self):
5959
)
6060

6161
def _split_generators(self, dl_manager):
62-
train_path = dl_manager.download_and_extract(_TRAIN_URL)
63-
valid_path = dl_manager.download_and_extract(_VALID_URL)
64-
test_path = dl_manager.download_and_extract(_TEST_URL)
62+
train_path = dl_manager.download(_TRAIN_URL)
63+
valid_path = dl_manager.download(_VALID_URL)
64+
test_path = dl_manager.download(_TEST_URL)
6565
return [
6666
datasets.SplitGenerator(
6767
name=datasets.Split.TRAIN,

datasets/ade_corpus_v2/ade_corpus_v2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ def _info(self):
158158
def _split_generators(self, dl_manager):
159159
"""Returns SplitGenerators."""
160160

161-
DAE_path = dl_manager.download_and_extract(_DOWNLOAD_URL.format("DRUG", "AE", "rel"))
162-
DD_path = dl_manager.download_and_extract(_DOWNLOAD_URL.format("DRUG", "DOSE", "rel"))
163-
DAE_NEG_path = dl_manager.download_and_extract(_DOWNLOAD_URL.format("ADE", "NEG", "txt"))
161+
DAE_path = dl_manager.download(_DOWNLOAD_URL.format("DRUG", "AE", "rel"))
162+
DD_path = dl_manager.download(_DOWNLOAD_URL.format("DRUG", "DOSE", "rel"))
163+
DAE_NEG_path = dl_manager.download(_DOWNLOAD_URL.format("ADE", "NEG", "txt"))
164164

165165
return [
166166
datasets.SplitGenerator(

datasets/ag_news/ag_news.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def _info(self):
6969
)
7070

7171
def _split_generators(self, dl_manager):
72-
train_path = dl_manager.download_and_extract(_TRAIN_DOWNLOAD_URL)
73-
test_path = dl_manager.download_and_extract(_TEST_DOWNLOAD_URL)
72+
train_path = dl_manager.download(_TRAIN_DOWNLOAD_URL)
73+
test_path = dl_manager.download(_TEST_DOWNLOAD_URL)
7474
return [
7575
datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": train_path}),
7676
datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepath": test_path}),

datasets/alt/alt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def _split_generators(self, dl_manager):
194194
else:
195195
data_split = {}
196196
for k in _SPLIT:
197-
data_split[k] = dl_manager.download_and_extract(_SPLIT[k])
197+
data_split[k] = dl_manager.download(_SPLIT[k])
198198

199199
return [
200200
datasets.SplitGenerator(

datasets/amazon_reviews_multi/amazon_reviews_multi.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ def _split_generators(self, dl_manager):
112112
dev_urls = [_DOWNLOAD_URL.format(split="dev", lang=lang) for lang in self.config.languages]
113113
test_urls = [_DOWNLOAD_URL.format(split="test", lang=lang) for lang in self.config.languages]
114114

115-
train_paths = dl_manager.download_and_extract(train_urls)
116-
dev_paths = dl_manager.download_and_extract(dev_urls)
117-
test_paths = dl_manager.download_and_extract(test_urls)
115+
train_paths = dl_manager.download(train_urls)
116+
dev_paths = dl_manager.download(dev_urls)
117+
test_paths = dl_manager.download(test_urls)
118118

119119
return [
120120
datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"file_paths": train_paths}),

datasets/amttl/amttl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def _split_generators(self, dl_manager):
107107
"dev": f"{_URL}{_DEV_FILE}",
108108
"test": f"{_URL}{_TEST_FILE}",
109109
}
110-
downloaded_files = dl_manager.download_and_extract(urls_to_download)
110+
downloaded_files = dl_manager.download(urls_to_download)
111111

112112
return [
113113
datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": downloaded_files["train"]}),

datasets/app_reviews/app_reviews.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def _info(self):
5858
)
5959

6060
def _split_generators(self, dl_manager):
61-
train_path = dl_manager.download_and_extract(_TRAIN_DOWNLOAD_URL)
61+
train_path = dl_manager.download(_TRAIN_DOWNLOAD_URL)
6262
return [
6363
datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": train_path}),
6464
]

datasets/ar_res_reviews/ar_res_reviews.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def _info(self):
6767
def _split_generators(self, dl_manager):
6868
"""Returns SplitGenerators."""
6969

70-
data_dir = dl_manager.download_and_extract(_DOWNLOAD_URL)
70+
data_dir = dl_manager.download(_DOWNLOAD_URL)
7171
return [
7272
datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": data_dir}),
7373
]

datasets/arabic_pos_dialect/arabic_pos_dialect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def _split_generators(self, dl_manager):
102102
# dl_manager is a datasets.download.DownloadManager that can be used to
103103
# download and extract URLs
104104
urls_to_download = {dialect: _URL + "seg_plus_pos_{}.txt".format(dialect) for dialect in _DIALECTS}
105-
dl_dir = dl_manager.download_and_extract(urls_to_download)
105+
dl_dir = dl_manager.download(urls_to_download)
106106
return [
107107
datasets.SplitGenerator(
108108
name=datasets.Split.TRAIN,

datasets/arcd/arcd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def _info(self):
8383

8484
def _split_generators(self, dl_manager):
8585
urls_to_download = _URLs
86-
downloaded_files = dl_manager.download_and_extract(urls_to_download)
86+
downloaded_files = dl_manager.download(urls_to_download)
8787

8888
return [
8989
datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": downloaded_files["train"]}),

0 commit comments

Comments
 (0)