diff --git a/src/datasets/packaged_modules/__init__.py b/src/datasets/packaged_modules/__init__.py index c79b3762b38..dc9409040b0 100644 --- a/src/datasets/packaged_modules/__init__.py +++ b/src/datasets/packaged_modules/__init__.py @@ -60,5 +60,5 @@ def _hash_python_lines(lines: List[str]) -> str: for _ext, (_module, _) in _EXTENSION_TO_MODULE.items(): _MODULE_TO_EXTENSIONS.setdefault(_module, []).append(_ext) -_MODULE_TO_EXTENSIONS["imagefolder"].append(".zip") -_MODULE_TO_EXTENSIONS["audiofolder"].append(".zip") +for _module in _MODULE_TO_EXTENSIONS: + _MODULE_TO_EXTENSIONS[_module].append(".zip") diff --git a/tests/fixtures/files.py b/tests/fixtures/files.py index 3aebcff3bb7..da8d3efee48 100644 --- a/tests/fixtures/files.py +++ b/tests/fixtures/files.py @@ -289,7 +289,7 @@ def bz2_csv_path(csv_path, tmp_path_factory): @pytest.fixture(scope="session") def zip_csv_path(csv_path, csv2_path, tmp_path_factory): - path = tmp_path_factory.mktemp("data") / "dataset.csv.zip" + path = tmp_path_factory.mktemp("zip_csv_path") / "csv-dataset.zip" with zipfile.ZipFile(path, "w") as f: f.write(csv_path, arcname=os.path.basename(csv_path)) f.write(csv2_path, arcname=os.path.basename(csv2_path)) diff --git a/tests/test_load.py b/tests/test_load.py index 8542ace86f1..41ca010ab12 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -1458,3 +1458,12 @@ def test_load_dataset_with_storage_options_with_decoding(mockfs, image_file): ds = load_dataset("imagefolder", data_files=data_files, storage_options=mockfs.storage_options) assert len(ds["train"]) == 1 assert isinstance(ds["train"][0]["image"], PIL.Image.Image) + + +def test_load_dataset_without_script_with_zip(zip_csv_path): + path = str(zip_csv_path.parent) + ds = load_dataset(path) + assert list(ds.keys()) == ["train"] + assert ds["train"].column_names == ["col_1", "col_2", "col_3"] + assert ds["train"].num_rows == 8 + assert ds["train"][0] == {"col_1": 0, "col_2": 0, "col_3": 0.0}