-
Notifications
You must be signed in to change notification settings - Fork 3k
Fix unused DatasetInfosDict code in push_to_hub #6042
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1677,23 +1677,12 @@ def push_to_hub( | |
| ) | ||
| dataset_card = DatasetCard.load(Path(dataset_readme_path)) | ||
| dataset_card_data = dataset_card.data | ||
| dataset_infos: DatasetInfosDict = DatasetInfosDict.from_dataset_card_data(dataset_card_data) | ||
| metadata_configs = MetadataConfigs.from_dataset_card_data(dataset_card_data) | ||
| # get the deprecated dataset_infos.json to update them | ||
| elif config.DATASETDICT_INFOS_FILENAME in repo_files: | ||
| dataset_card = None | ||
| dataset_card_data = DatasetCardData() | ||
| metadata_configs = MetadataConfigs() | ||
| download_config = DownloadConfig() | ||
| download_config.download_desc = "Downloading metadata" | ||
| download_config.token = token | ||
| dataset_infos_path = cached_path( | ||
| hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME), | ||
| download_config=download_config, | ||
| ) | ||
| with open(dataset_infos_path, encoding="utf-8") as f: | ||
| dataset_infos: dict = json.load(f) | ||
| dataset_infos.get(config_name, None) if dataset_infos else None | ||
| else: | ||
| dataset_card = None | ||
| dataset_card_data = DatasetCardData() | ||
|
|
@@ -1722,8 +1711,15 @@ def push_to_hub( | |
| MetadataConfigs({"default": default_metadata_configs_to_dump}).to_dataset_card_data(dataset_card_data) | ||
| # push to the deprecated dataset_infos.json | ||
| if config.DATASETDICT_INFOS_FILENAME in repo_files: | ||
| download_config = DownloadConfig() | ||
| download_config.download_desc = "Downloading metadata" | ||
| download_config.token = token | ||
| dataset_infos_path = cached_path( | ||
| hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME), | ||
| download_config=download_config, | ||
| ) | ||
|
Comment on lines
+1714
to
+1720
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this code needs to be here, because in the previous location it would be run only if there is no README.md and if there is a json file. But it needs to run if there is a json file, no matter if there is a README.md or not
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah lol true good catch! |
||
| with open(dataset_infos_path, encoding="utf-8") as f: | ||
| dataset_infos: DatasetInfosDict = json.load(f) | ||
| dataset_infos: dict = json.load(f) | ||
| dataset_infos[config_name] = asdict(info_to_dump) | ||
| buffer = BytesIO() | ||
| buffer.write(json.dumps(dataset_infos, indent=4).encode("utf-8")) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's not used so I removed this line