@@ -313,23 +313,24 @@ def copy(self) -> "DatasetInfo":
313313
314314 def _to_yaml_dict (self ) -> dict :
315315 yaml_dict = {}
316- for field in dataclasses .fields (self ):
317- if field .name in self ._INCLUDED_INFO_IN_YAML :
318- value = getattr (self , field .name )
316+ dataset_info_dict = asdict (self )
317+ for key in dataset_info_dict :
318+ if key in self ._INCLUDED_INFO_IN_YAML :
319+ value = getattr (self , key )
319320 if hasattr (value , "_to_yaml_list" ): # Features, SplitDict
320- yaml_dict [field . name ] = value ._to_yaml_list ()
321+ yaml_dict [key ] = value ._to_yaml_list ()
321322 elif hasattr (value , "_to_yaml_string" ): # Version
322- yaml_dict [field . name ] = value ._to_yaml_string ()
323+ yaml_dict [key ] = value ._to_yaml_string ()
323324 else :
324- yaml_dict [field . name ] = value
325+ yaml_dict [key ] = value
325326 return yaml_dict
326327
327328 @classmethod
328329 def _from_yaml_dict (cls , yaml_data : dict ) -> "DatasetInfo" :
329330 yaml_data = copy .deepcopy (yaml_data )
330- if "features" in yaml_data :
331+ if yaml_data . get ( "features" ) is not None :
331332 yaml_data ["features" ] = Features ._from_yaml_list (yaml_data ["features" ])
332- if "splits" in yaml_data :
333+ if yaml_data . get ( "splits" ) is not None :
333334 yaml_data ["splits" ] = SplitDict ._from_yaml_list (yaml_data ["splits" ])
334335 field_names = {f .name for f in dataclasses .fields (cls )}
335336 return cls (** {k : v for k , v in yaml_data .items () if k in field_names })
@@ -346,11 +347,10 @@ def write_to_directory(self, dataset_infos_dir, overwrite=False, pretty_print=Fa
346347 if os .path .exists (dataset_infos_path ):
347348 # for backward compatibility, let's update the JSON file if it exists
348349 with open (dataset_infos_path , "w" , encoding = "utf-8" ) as f :
349- json .dump (
350- {config_name : asdict (dset_info ) for config_name , dset_info in total_dataset_infos .items ()},
351- f ,
352- indent = 4 if pretty_print else None ,
353- )
350+ dataset_infos_dict = {
351+ config_name : asdict (dset_info ) for config_name , dset_info in total_dataset_infos .items ()
352+ }
353+ json .dump (dataset_infos_dict , f , indent = 4 if pretty_print else None )
354354 # Dump the infos in the YAML part of the README.md file
355355 if os .path .exists (dataset_readme_path ):
356356 dataset_metadata = DatasetMetadata .from_readme (Path (dataset_readme_path ))
@@ -365,6 +365,9 @@ def write_to_directory(self, dataset_infos_dir, overwrite=False, pretty_print=Fa
365365 dataset_metadata ["dataset_infos" ] = dataset_metadata ["dataset_infos" ][0 ]
366366 # no need to include the configuration name when there's only one configuration
367367 dataset_metadata ["dataset_infos" ].pop ("config_name" , None )
368+ else :
369+ for config_name , dataset_info_yaml_dict in zip (total_dataset_infos , dataset_metadata ["dataset_infos" ]):
370+ dataset_info_yaml_dict ["config_name" ] = config_name
368371 dataset_metadata .to_readme (Path (dataset_readme_path ))
369372
370373 @classmethod
@@ -383,7 +386,7 @@ def from_directory(cls, dataset_infos_dir):
383386 # Load the info from the YAML part of README.md
384387 if os .path .exists (os .path .join (dataset_infos_dir , "README.md" )):
385388 dataset_metadata = DatasetMetadata .from_readme (Path (dataset_infos_dir ) / "README.md" )
386- if isinstance (dataset_metadata .get ("dataset_infos" ), (list , dict )) and dataset_metadata [ "dataset_infos" ] :
389+ if isinstance (dataset_metadata .get ("dataset_infos" ), (list , dict )):
387390 if isinstance (dataset_metadata ["dataset_infos" ], list ):
388391 dataset_infos_dict = {
389392 dataset_info_yaml_dict .get ("config_name" , "default" ): DatasetInfo ._from_yaml_dict (
@@ -392,11 +395,10 @@ def from_directory(cls, dataset_infos_dir):
392395 for dataset_info_yaml_dict in dataset_metadata ["dataset_infos" ]
393396 }
394397 else :
395- dataset_infos_dict = {
396- dataset_metadata ["dataset_infos" ].get ("config_name" , "default" ): DatasetInfo ._from_yaml_dict (
397- dataset_metadata ["dataset_infos" ]
398- )
399- }
398+ dataset_info = DatasetInfo ._from_yaml_dict (dataset_metadata ["dataset_infos" ])
399+ dataset_info .config_name = dataset_metadata ["dataset_infos" ].get ("config_name" , "default" )
400+ dataset_infos_dict = {dataset_info .config_name : dataset_info }
401+
400402 return cls (** dataset_infos_dict )
401403
402404
0 commit comments