@@ -491,7 +491,7 @@ def _load_importable_file(
491491
492492def infer_module_for_data_files_list (
493493 data_files_list : DataFilesList , download_config : Optional [DownloadConfig ] = None
494- ) -> Optional [ Tuple [str , str ] ]:
494+ ) -> Tuple [Optional [ str ], dict ]:
495495 """Infer module (and builder kwargs) from list of data files.
496496
497497 It picks the module based on the most common file extension.
@@ -507,18 +507,18 @@ def infer_module_for_data_files_list(
507507 - dict of builder kwargs
508508 """
509509 extensions_counter = Counter (
510- "." + suffix .lower ()
510+ ( "." + suffix .lower (), xbasename ( filepath ) in ( "metadata.jsonl" , "metadata.csv" ) )
511511 for filepath in data_files_list [: config .DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE ]
512512 for suffix in xbasename (filepath ).split ("." )[1 :]
513513 )
514514 if extensions_counter :
515515
516- def sort_key (ext_count : Tuple [str , int ]) -> Tuple [int , bool ]:
517- """Sort by count and set ".parquet" as the favorite in case of a draw"""
518- ext , count = ext_count
519- return (count , ext == ".parquet" , ext )
516+ def sort_key (ext_count : Tuple [Tuple [ str , bool ] , int ]) -> Tuple [int , bool ]:
517+ """Sort by count and set ".parquet" as the favorite in case of a draw, and ignore metadata files """
518+ ( ext , is_metadata ) , count = ext_count
519+ return (not is_metadata , count , ext == ".parquet" , ext )
520520
521- for ext , _ in sorted (extensions_counter .items (), key = sort_key , reverse = True ):
521+ for ( ext , _ ) , _ in sorted (extensions_counter .items (), key = sort_key , reverse = True ):
522522 if ext in _EXTENSION_TO_MODULE :
523523 return _EXTENSION_TO_MODULE [ext ]
524524 elif ext == ".zip" :
@@ -528,7 +528,7 @@ def sort_key(ext_count: Tuple[str, int]) -> Tuple[int, bool]:
528528
529529def infer_module_for_data_files_list_in_archives (
530530 data_files_list : DataFilesList , download_config : Optional [DownloadConfig ] = None
531- ) -> Optional [ Tuple [str , str ] ]:
531+ ) -> Tuple [Optional [ str ], dict ]:
532532 """Infer module (and builder kwargs) from list of archive data files.
533533
534534 Args:
0 commit comments