|
1 | | -from email.mime import base |
2 | 1 | import os |
| 2 | +from email.mime import base |
3 | 3 | from functools import partial |
4 | 4 | from pathlib import Path, PurePath |
5 | 5 | from typing import Callable, Dict, List, Optional, Set, Tuple, Union |
@@ -103,14 +103,8 @@ def _is_inside_unrequested_special_dir_to_ignore(matched_rel_path: str, pattern: |
103 | 103 | # We just need to check if every special directories from the path is present explicly in the pattern. |
104 | 104 | # Since we assume that the path matches the pattern, it's equivalent to counting that both |
105 | 105 | # the path and the pattern have the same number of special directories. |
106 | | - data_dirs_to_ignore_in_path = [ |
107 | | - part for part in PurePath(matched_rel_path).parts |
108 | | - if part in SPECIAL_DIRS_TO_IGNORE |
109 | | - ] |
110 | | - data_dirs_to_ignore_in_pattern = [ |
111 | | - part for part in PurePath(pattern).parts |
112 | | - if part in SPECIAL_DIRS_TO_IGNORE |
113 | | - ] |
| 106 | + data_dirs_to_ignore_in_path = [part for part in PurePath(matched_rel_path).parts if part in SPECIAL_DIRS_TO_IGNORE] |
| 107 | + data_dirs_to_ignore_in_pattern = [part for part in PurePath(pattern).parts if part in SPECIAL_DIRS_TO_IGNORE] |
114 | 108 | return len(data_dirs_to_ignore_in_path) != len(data_dirs_to_ignore_in_pattern) |
115 | 109 |
|
116 | 110 |
|
@@ -168,12 +162,10 @@ def _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(matched_rel_ |
168 | 162 | # Since we assume that the path matches the pattern, it's equivalent to counting that both |
169 | 163 | # the path and the pattern have the same number of hidden parts. |
170 | 164 | hidden_directories_in_path = [ |
171 | | - part for part in PurePath(matched_rel_path).parts |
172 | | - if part.startswith(".") and not set(part) == {"."} |
| 165 | + part for part in PurePath(matched_rel_path).parts if part.startswith(".") and not set(part) == {"."} |
173 | 166 | ] |
174 | 167 | hidden_directories_in_pattern = [ |
175 | | - part for part in PurePath(pattern).parts |
176 | | - if part.startswith(".") and not set(part) == {"."} |
| 168 | + part for part in PurePath(pattern).parts if part.startswith(".") and not set(part) == {"."} |
177 | 169 | ] |
178 | 170 | return len(hidden_directories_in_path) != len(hidden_directories_in_pattern) |
179 | 171 |
|
@@ -219,19 +211,19 @@ def _resolve_single_pattern_locally( |
219 | 211 | If an URL is passed, it is returned as is. |
220 | 212 | """ |
221 | 213 | if is_relative_path(pattern): |
222 | | - abs_pattern = os.path.join(base_path, pattern) |
223 | | - effective_base_path = base_path |
| 214 | + pattern = os.path.join(base_path, pattern) |
224 | 215 | else: |
225 | | - abs_pattern = pattern |
226 | | - effective_base_path = "/" |
| 216 | + base_path = "/" |
227 | 217 | fs = LocalFileSystem() |
228 | | - glob_iter = [PurePath(filepath) for filepath in fs.glob(abs_pattern) if fs.isfile(filepath)] |
| 218 | + glob_iter = [PurePath(filepath) for filepath in fs.glob(pattern) if fs.isfile(filepath)] |
229 | 219 | matched_paths = [ |
230 | 220 | Path(filepath).resolve() |
231 | 221 | for filepath in glob_iter |
232 | 222 | if (filepath.name not in FILES_TO_IGNORE or PurePath(pattern).name == filepath.name) |
233 | | - and not _is_inside_unrequested_special_dir_to_ignore(os.path.relpath(filepath, effective_base_path), pattern) |
234 | | - and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(os.path.relpath(filepath, effective_base_path), pattern) |
| 223 | + and not _is_inside_unrequested_special_dir_to_ignore(os.path.relpath(filepath, base_path), pattern) |
| 224 | + and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir( |
| 225 | + os.path.relpath(filepath, base_path), pattern |
| 226 | + ) |
235 | 227 | ] # ignore .ipynb and __pycache__, but keep /../ |
236 | 228 | if allowed_extensions is not None: |
237 | 229 | out = [ |
@@ -419,7 +411,9 @@ def _resolve_single_pattern_in_dataset_repository( |
419 | 411 | for filepath in glob_iter |
420 | 412 | if (filepath.name not in FILES_TO_IGNORE or PurePath(pattern).name == filepath.name) |
421 | 413 | and not _is_inside_unrequested_special_dir_to_ignore(os.path.relpath(filepath, base_path), pattern) |
422 | | - and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(os.path.relpath(filepath, base_path), pattern) |
| 414 | + and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir( |
| 415 | + os.path.relpath(filepath, base_path), pattern |
| 416 | + ) |
423 | 417 | ] # ignore .ipynb and __pycache__, but keep /../ |
424 | 418 | if allowed_extensions is not None: |
425 | 419 | out = [ |
|
0 commit comments