Skip to content

Commit 7893856

Browse files
committed
add space separator
1 parent 159649a commit 7893856

File tree

2 files changed

+21
-16
lines changed

2 files changed

+21
-16
lines changed

src/datasets/data_files.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,28 @@ class Url(str):
2727
SPLIT_PATTERN_SHARDED = "data/{split}-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*"
2828

2929
DEFAULT_PATTERNS_SPLIT_IN_FILENAME = {
30-
str(Split.TRAIN): ["**[-._/]train[-._]*", "train[-._]*", "**[-._/]training[-._]*", "training[-._]*"],
31-
str(Split.TEST): ["**[-._/]test[-._]*", "test[-._]*", "**[-._/]eval[-._]*", "eval[-._]*"],
30+
str(Split.TRAIN): ["**[-._ /]train[-._ ]*", "train[-._ ]*", "**[-._ /]training[-._ ]*", "training[-._ ]*"],
31+
str(Split.TEST): ["**[-._ /]test[-._ ]*", "test[-._ ]*", "**[-._ /]eval[-._ ]*", "eval[-._ ]*"],
3232
str(Split.VALIDATION): [
33-
"**[-._/]dev[-._]*",
34-
"dev[-._]*",
35-
"**[-._/]valid[-._]*",
36-
"valid[-._]*",
37-
"**[-._/]validation[-._]*",
38-
"validation[-._]*",
33+
"**[-._ /]dev[-._ ]*",
34+
"dev[-._ ]*",
35+
"**[-._ /]valid[-._ ]*",
36+
"valid[-._ ]*",
37+
"**[-._ /]validation[-._ ]*",
38+
"validation[-._ ]*",
3939
],
4040
}
4141

4242
DEFAULT_PATTERNS_SPLIT_IN_DIR_NAME = {
43-
str(Split.TRAIN): ["train[-._/]**", "**[-._/]train[-._/]**", "training[-._/]**", "**[-._/]training[-._/]**"],
44-
str(Split.TEST): ["test[-._/]**", "**[-._/]test[-._/]**", "eval[-._/]**", "**[-._/]eval[-._/]**"],
43+
str(Split.TRAIN): ["train[-._ /]**", "**[-._ /]train[-._ /]**", "training[-._ /]**", "**[-._ /]training[-._ /]**"],
44+
str(Split.TEST): ["test[-._ /]**", "**[-._ /]test[-._ /]**", "eval[-._ /]**", "**[-._ /]eval[-._ /]**"],
4545
str(Split.VALIDATION): [
46-
"dev[-._/]**",
47-
"**[-._/]dev[-._/]**",
48-
"valid[-._/]**",
49-
"**[-._/]valid[-._/]**",
50-
"validation[-._/]**",
51-
"**[-._/]validation[-._/]**",
46+
"dev[-._ /]**",
47+
"**[-._ /]dev[-._ /]**",
48+
"valid[-._ /]**",
49+
"**[-._ /]valid[-._ /]**",
50+
"validation[-._ /]**",
51+
"**[-._ /]validation[-._ /]**",
5252
],
5353
}
5454

tests/test_data_files.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,11 @@ def _open(
609609
# With "dev" or "eval" without separators
610610
{"train": "developers_list.txt"},
611611
{"train": "data/seqeval_results.txt"},
612+
# With supported separators
613+
{"test": "my.test.file.txt"},
614+
{"test": "my-test-file.txt"},
615+
{"test": "my_test_file.txt"},
616+
{"test": "my test file.txt"},
612617
],
613618
)
614619
def test_get_data_files_patterns(data_file_per_split):

0 commit comments

Comments
 (0)