Skip to content

Commit 234b45f

Browse files
committed
simplify
1 parent c7dd8e5 commit 234b45f

File tree

1 file changed

+15
-21
lines changed

1 file changed

+15
-21
lines changed

src/datasets/data_files.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from email.mime import base
21
import os
2+
from email.mime import base
33
from functools import partial
44
from pathlib import Path, PurePath
55
from typing import Callable, Dict, List, Optional, Set, Tuple, Union
@@ -103,14 +103,8 @@ def _is_inside_unrequested_special_dir_to_ignore(matched_rel_path: str, pattern:
103103
# We just need to check if every special directories from the path is present explicly in the pattern.
104104
# Since we assume that the path matches the pattern, it's equivalent to counting that both
105105
# the path and the pattern have the same number of special directories.
106-
data_dirs_to_ignore_in_path = [
107-
part for part in PurePath(matched_rel_path).parts
108-
if part in SPECIAL_DIRS_TO_IGNORE
109-
]
110-
data_dirs_to_ignore_in_pattern = [
111-
part for part in PurePath(pattern).parts
112-
if part in SPECIAL_DIRS_TO_IGNORE
113-
]
106+
data_dirs_to_ignore_in_path = [part for part in PurePath(matched_rel_path).parts if part in SPECIAL_DIRS_TO_IGNORE]
107+
data_dirs_to_ignore_in_pattern = [part for part in PurePath(pattern).parts if part in SPECIAL_DIRS_TO_IGNORE]
114108
return len(data_dirs_to_ignore_in_path) != len(data_dirs_to_ignore_in_pattern)
115109

116110

@@ -168,12 +162,10 @@ def _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(matched_rel_
168162
# Since we assume that the path matches the pattern, it's equivalent to counting that both
169163
# the path and the pattern have the same number of hidden parts.
170164
hidden_directories_in_path = [
171-
part for part in PurePath(matched_rel_path).parts
172-
if part.startswith(".") and not set(part) == {"."}
165+
part for part in PurePath(matched_rel_path).parts if part.startswith(".") and not set(part) == {"."}
173166
]
174167
hidden_directories_in_pattern = [
175-
part for part in PurePath(pattern).parts
176-
if part.startswith(".") and not set(part) == {"."}
168+
part for part in PurePath(pattern).parts if part.startswith(".") and not set(part) == {"."}
177169
]
178170
return len(hidden_directories_in_path) != len(hidden_directories_in_pattern)
179171

@@ -219,19 +211,19 @@ def _resolve_single_pattern_locally(
219211
If an URL is passed, it is returned as is.
220212
"""
221213
if is_relative_path(pattern):
222-
abs_pattern = os.path.join(base_path, pattern)
223-
effective_base_path = base_path
214+
pattern = os.path.join(base_path, pattern)
224215
else:
225-
abs_pattern = pattern
226-
effective_base_path = "/"
216+
base_path = "/"
227217
fs = LocalFileSystem()
228-
glob_iter = [PurePath(filepath) for filepath in fs.glob(abs_pattern) if fs.isfile(filepath)]
218+
glob_iter = [PurePath(filepath) for filepath in fs.glob(pattern) if fs.isfile(filepath)]
229219
matched_paths = [
230220
Path(filepath).resolve()
231221
for filepath in glob_iter
232222
if (filepath.name not in FILES_TO_IGNORE or PurePath(pattern).name == filepath.name)
233-
and not _is_inside_unrequested_special_dir_to_ignore(os.path.relpath(filepath, effective_base_path), pattern)
234-
and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(os.path.relpath(filepath, effective_base_path), pattern)
223+
and not _is_inside_unrequested_special_dir_to_ignore(os.path.relpath(filepath, base_path), pattern)
224+
and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(
225+
os.path.relpath(filepath, base_path), pattern
226+
)
235227
] # ignore .ipynb and __pycache__, but keep /../
236228
if allowed_extensions is not None:
237229
out = [
@@ -419,7 +411,9 @@ def _resolve_single_pattern_in_dataset_repository(
419411
for filepath in glob_iter
420412
if (filepath.name not in FILES_TO_IGNORE or PurePath(pattern).name == filepath.name)
421413
and not _is_inside_unrequested_special_dir_to_ignore(os.path.relpath(filepath, base_path), pattern)
422-
and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(os.path.relpath(filepath, base_path), pattern)
414+
and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(
415+
os.path.relpath(filepath, base_path), pattern
416+
)
423417
] # ignore .ipynb and __pycache__, but keep /../
424418
if allowed_extensions is not None:
425419
out = [

0 commit comments

Comments
 (0)