44from datasets import Dataset , DatasetDict , Features , NamedSplit , Value
55from datasets .io .parquet import ParquetDatasetReader , ParquetDatasetWriter
66
7- from ..utils import assert_arrow_memory_doesnt_increase , assert_arrow_memory_increases
7+ from ..utils import assert_arrow_memory_doesnt_increase , assert_arrow_memory_increases , require_pyarrow_at_least_3
88
99
1010def _check_parquet_dataset (dataset , expected_features ):
@@ -16,6 +16,7 @@ def _check_parquet_dataset(dataset, expected_features):
1616 assert dataset .features [feature ].dtype == expected_dtype
1717
1818
19+ @require_pyarrow_at_least_3
1920@pytest .mark .parametrize ("keep_in_memory" , [False , True ])
2021def test_dataset_from_parquet_keep_in_memory (keep_in_memory , parquet_path , tmp_path ):
2122 cache_dir = tmp_path / "cache"
@@ -25,6 +26,7 @@ def test_dataset_from_parquet_keep_in_memory(keep_in_memory, parquet_path, tmp_p
2526 _check_parquet_dataset (dataset , expected_features )
2627
2728
29+ @require_pyarrow_at_least_3
2830@pytest .mark .parametrize (
2931 "features" ,
3032 [
@@ -46,6 +48,7 @@ def test_dataset_from_parquet_features(features, parquet_path, tmp_path):
4648 _check_parquet_dataset (dataset , expected_features )
4749
4850
51+ @require_pyarrow_at_least_3
4952@pytest .mark .parametrize ("split" , [None , NamedSplit ("train" ), "train" , "test" ])
5053def test_dataset_from_parquet_split (split , parquet_path , tmp_path ):
5154 cache_dir = tmp_path / "cache"
@@ -55,6 +58,7 @@ def test_dataset_from_parquet_split(split, parquet_path, tmp_path):
5558 assert dataset .split == str (split ) if split else "train"
5659
5760
61+ @require_pyarrow_at_least_3
5862@pytest .mark .parametrize ("path_type" , [str , list ])
5963def test_dataset_from_parquet_path_type (path_type , parquet_path , tmp_path ):
6064 if issubclass (path_type , str ):
@@ -78,6 +82,7 @@ def _check_parquet_datasetdict(dataset_dict, expected_features, splits=("train",
7882 assert dataset .features [feature ].dtype == expected_dtype
7983
8084
85+ @require_pyarrow_at_least_3
8186@pytest .mark .parametrize ("keep_in_memory" , [False , True ])
8287def test_parquet_datasetdict_reader_keep_in_memory (keep_in_memory , parquet_path , tmp_path ):
8388 cache_dir = tmp_path / "cache"
@@ -89,6 +94,7 @@ def test_parquet_datasetdict_reader_keep_in_memory(keep_in_memory, parquet_path,
8994 _check_parquet_datasetdict (dataset , expected_features )
9095
9196
97+ @require_pyarrow_at_least_3
9298@pytest .mark .parametrize (
9399 "features" ,
94100 [
@@ -110,6 +116,7 @@ def test_parquet_datasetdict_reader_features(features, parquet_path, tmp_path):
110116 _check_parquet_datasetdict (dataset , expected_features )
111117
112118
119+ @require_pyarrow_at_least_3
113120@pytest .mark .parametrize ("split" , [None , NamedSplit ("train" ), "train" , "test" ])
114121def test_parquet_datasetdict_reader_split (split , parquet_path , tmp_path ):
115122 if split :
@@ -124,6 +131,7 @@ def test_parquet_datasetdict_reader_split(split, parquet_path, tmp_path):
124131 assert all (dataset [split ].split == split for split in path .keys ())
125132
126133
134+ @require_pyarrow_at_least_3
127135def test_parquer_write (dataset , tmp_path ):
128136 writer = ParquetDatasetWriter (dataset , tmp_path / "foo.parquet" )
129137 assert writer .write () > 0
0 commit comments