Skip to content

Commit da29ac3

Browse files
authored
Don't expand_info in HF glob (#6469)
don't expand_info in HF glob
1 parent 30f6a2d commit da29ac3

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/datasets/data_files.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,13 @@ def resolve_pattern(
347347
files_to_ignore = set(FILES_TO_IGNORE) - {xbasename(pattern)}
348348
protocol = fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0]
349349
protocol_prefix = protocol + "://" if protocol != "file" else ""
350+
glob_kwargs = {}
351+
if protocol == "hf" and config.HF_HUB_VERSION >= version.parse("0.20.0"):
352+
# 10 times faster glob with detail=True (ignores costly info like lastCommit)
353+
glob_kwargs["expand_info"] = False
350354
matched_paths = [
351355
filepath if filepath.startswith(protocol_prefix) else protocol_prefix + filepath
352-
for filepath, info in fs.glob(pattern, detail=True).items()
356+
for filepath, info in fs.glob(pattern, detail=True, **glob_kwargs).items()
353357
if info["type"] == "file"
354358
and (xbasename(filepath) not in files_to_ignore)
355359
and not _is_inside_unrequested_special_dir(

0 commit comments

Comments
 (0)