diff --git a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py index 6c96d402aa1..e2277ba24e4 100644 --- a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py +++ b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py @@ -6,7 +6,6 @@ import pandas as pd import pyarrow as pa -import pyarrow.compute as pc import pyarrow.json as paj import datasets @@ -310,13 +309,10 @@ def _generate_examples(self, files, metadata_files, split_name, add_metadata, ad ) pa_metadata_table = self._read_metadata(downloaded_metadata_file) pa_file_name_array = pa_metadata_table["file_name"] - pa_file_name_array = pc.replace_substring( - pa_file_name_array, pattern="\\", replacement="/" - ) pa_metadata_table = pa_metadata_table.drop(["file_name"]) metadata_dir = os.path.dirname(metadata_file) metadata_dict = { - file_name: sample_metadata + os.path.normpath(file_name).replace("\\", "/"): sample_metadata for file_name, sample_metadata in zip( pa_file_name_array.to_pylist(), pa_table_to_pylist(pa_metadata_table) ) @@ -379,13 +375,10 @@ def _generate_examples(self, files, metadata_files, split_name, add_metadata, ad ) pa_metadata_table = self._read_metadata(downloaded_metadata_file) pa_file_name_array = pa_metadata_table["file_name"] - pa_file_name_array = pc.replace_substring( - pa_file_name_array, pattern="\\", replacement="/" - ) pa_metadata_table = pa_metadata_table.drop(["file_name"]) metadata_dir = os.path.dirname(downloaded_metadata_file) metadata_dict = { - file_name: sample_metadata + os.path.normpath(file_name).replace("\\", "/"): sample_metadata for file_name, sample_metadata in zip( pa_file_name_array.to_pylist(), pa_table_to_pylist(pa_metadata_table) ) diff --git a/tests/packaged_modules/test_folder_based_builder.py b/tests/packaged_modules/test_folder_based_builder.py index 34fe3a62db7..5dbc59b3fca 100644 --- a/tests/packaged_modules/test_folder_based_builder.py +++ b/tests/packaged_modules/test_folder_based_builder.py @@ -132,7 +132,7 @@ def data_files_with_one_split_and_metadata(tmp_path, auto_text_file): """\ {"file_name": "file.txt", "additional_feature": "Dummy file"} {"file_name": "file2.txt", "additional_feature": "Second dummy file"} - {"file_name": "subdir/file3.txt", "additional_feature": "Third dummy file"} + {"file_name": "./subdir/file3.txt", "additional_feature": "Third dummy file"} """ ) with open(metadata_filename, "w", encoding="utf-8") as f: