|
11 | 11 |
|
12 | 12 | from datasets.utils.doc_utils import is_documented_by |
13 | 13 |
|
| 14 | +from . import config |
14 | 15 | from .arrow_dataset import Dataset |
15 | 16 | from .features import Features |
16 | 17 | from .filesystems import extract_path_from_uri, is_remote_filesystem |
@@ -673,7 +674,7 @@ def save_to_disk(self, dataset_dict_path: str, fs=None): |
673 | 674 |
|
674 | 675 | json.dump( |
675 | 676 | {"splits": list(self)}, |
676 | | - fs.open(Path(dest_dataset_dict_path, "dataset_dict.json").as_posix(), "w", encoding="utf-8"), |
| 677 | + fs.open(Path(dest_dataset_dict_path, config.DATASETDICT_JSON_FILENAME).as_posix(), "w", encoding="utf-8"), |
677 | 678 | ) |
678 | 679 | for k, dataset in self.items(): |
679 | 680 | dataset.save_to_disk(Path(dest_dataset_dict_path, k).as_posix(), fs) |
@@ -706,8 +707,14 @@ def load_from_disk(dataset_dict_path: str, fs=None, keep_in_memory: Optional[boo |
706 | 707 | else: |
707 | 708 | fs = fsspec.filesystem("file") |
708 | 709 | dest_dataset_dict_path = dataset_dict_path |
| 710 | + dataset_dict_json_path = Path(dest_dataset_dict_path, config.DATASETDICT_JSON_FILENAME).as_posix() |
| 711 | + dataset_info_path = Path(dest_dataset_dict_path, config.DATASET_INFO_FILENAME).as_posix() |
| 712 | + if fs.isfile(dataset_info_path) and not fs.isfile(dataset_dict_json_path): |
| 713 | + raise FileNotFoundError( |
| 714 | + f"No such file or directory: '{dataset_dict_json_path}'. Looks like you tried to load a Dataset object, not a DatasetDict. Please use Dataset.load_from_disk instead." |
| 715 | + ) |
709 | 716 | for k in json.load( |
710 | | - fs.open(Path(dest_dataset_dict_path, "dataset_dict.json").as_posix(), "r", encoding="utf-8") |
| 717 | + fs.open(Path(dest_dataset_dict_path, config.DATASET_STATE_JSON_FILENAME).as_posix(), "r", encoding="utf-8") |
711 | 718 | )["splits"]: |
712 | 719 | dataset_dict_split_path = ( |
713 | 720 | dataset_dict_path.split("://")[0] + "://" + Path(dest_dataset_dict_path, k).as_posix() |
|
0 commit comments