@@ -592,15 +592,15 @@ def infer_module_for_data_files(
592592 return module_name , default_builder_kwargs
593593
594594
595- def update_hash_for_cache (hash : str , metadata_configs : MetadataConfigs , dataset_infos : DatasetInfosDict ) -> str :
595+ def update_hash_for_cache (hash : str , ** kwargs : Dict [ str , Union [ MetadataConfigs , DataFilesDict , DataFilesDict ]] ) -> str :
596596 """
597597 Used to update hash of packaged modules which is used for creating unique cache directories to reflect
598598 different config parameters which are passed in metadata from readme.
599599 """
600600 m = Hasher ()
601601 m .update (hash )
602- m . update ( metadata_configs )
603- m .update (dataset_infos )
602+ for obj in kwargs . values ():
603+ m .update (obj )
604604 return m .hexdigest ()
605605
606606
@@ -2231,15 +2231,25 @@ def load_dataset_builder(
22312231 error_msg += f'\n For example `data_files={{"train": "path/to/data/train/*.{ example_extensions [0 ]} "}}`'
22322232 raise ValueError (error_msg )
22332233
2234+ hash = dataset_module .hash
22342235 builder_cls = get_dataset_builder_class (dataset_module , dataset_name = dataset_name )
2236+ if len (builder_cls .builder_configs ) > 1 :
2237+ builder_config = builder_cls .builder_configs .get (config_name or builder_cls .DEFAULT_CONFIG_NAME )
2238+ elif len (builder_cls .builder_configs ) == 1 :
2239+ builder_config = builder_cls .BUILDER_CONFIGS [0 ]
2240+ else :
2241+ builder_config = None
2242+ if builder_config and builder_config .data_files is not None :
2243+ builder_config ._resolve_data_files (base_path = builder_kwargs ["base_path" ], download_config = download_config )
2244+ hash = update_hash_for_cache (hash , data_files = builder_config .data_files )
22352245 # Instantiate the dataset builder
22362246 builder_instance : DatasetBuilder = builder_cls (
22372247 cache_dir = cache_dir ,
22382248 dataset_name = dataset_name ,
22392249 config_name = config_name ,
22402250 data_dir = data_dir ,
22412251 data_files = data_files ,
2242- hash = dataset_module . hash ,
2252+ hash = hash ,
22432253 info = info ,
22442254 features = features ,
22452255 token = token ,
0 commit comments