diff --git a/setup.py b/setup.py index b9e930fe804..4fce8ec5e48 100644 --- a/setup.py +++ b/setup.py @@ -175,7 +175,7 @@ "pyspark>=3.4", # https://issues.apache.org/jira/browse/SPARK-40991 fixed in 3.4.0 "py7zr", "rarfile>=4.0", - "sqlalchemy<2.0.0", + "sqlalchemy", "s3fs>=2021.11.1", # aligned with fsspec[http]>=2021.11.1; test only on python 3.7 for now "tensorflow>=2.3,!=2.6.0,!=2.6.1; sys_platform != 'darwin' or platform_machine != 'arm64'", "tensorflow-macos; sys_platform == 'darwin' and platform_machine == 'arm64'", diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py index 344a479612a..20b128c8e52 100644 --- a/src/datasets/features/features.py +++ b/src/datasets/features/features.py @@ -370,7 +370,7 @@ def _cast_to_python_objects(obj: Any, only_1d_for_numpy: bool, optimize_list_cas key: _cast_to_python_objects( value, only_1d_for_numpy=only_1d_for_numpy, optimize_list_casting=optimize_list_casting )[0] - for key, value in obj.to_dict("list").items() + for key, value in obj.to_dict("series").items() }, True, ) diff --git a/src/datasets/table.py b/src/datasets/table.py index 763716b6415..de896218191 100644 --- a/src/datasets/table.py +++ b/src/datasets/table.py @@ -2076,9 +2076,6 @@ def cast_array_to_feature(array: pa.Array, feature: "FeatureType", allow_number_ elif pa.types.is_fixed_size_list(array.type): # feature must be either [subfeature] or Sequence(subfeature) array_values = array.values - if config.PYARROW_VERSION.major < 15: - # PyArrow bug: https://github.com/apache/arrow/issues/35360 - array_values = array.values[array.offset * array.type.list_size :] if isinstance(feature, list): if array.null_count > 0: if config.PYARROW_VERSION.major < 10: @@ -2090,6 +2087,10 @@ def cast_array_to_feature(array: pa.Array, feature: "FeatureType", allow_number_ return pa.ListArray.from_arrays(array.offsets, _c(array_values, feature[0])) elif isinstance(feature, Sequence): if feature.length > -1: + if array.offset and feature.length * len(array) != len(array_values): + array_values = array.values[ + array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size + ] if feature.length * len(array) == len(array_values): return pa.FixedSizeListArray.from_arrays(_c(array_values, feature.feature), feature.length) else: diff --git a/tests/conftest.py b/tests/conftest.py index bcae5b1da1d..c6770ddac8c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -51,7 +51,10 @@ def set_update_download_counts_to_false(monkeypatch): def set_sqlalchemy_silence_uber_warning(monkeypatch): # Required to suppress RemovedIn20Warning when feature(s) are not compatible with SQLAlchemy 2.0 # To be removed once SQLAlchemy 2.0 supported - monkeypatch.setattr("sqlalchemy.util.deprecations.SILENCE_UBER_WARNING", True) + try: + monkeypatch.setattr("sqlalchemy.util.deprecations.SILENCE_UBER_WARNING", True) + except AttributeError: + pass @pytest.fixture(autouse=True, scope="session")