1616# Lint as: python3
1717""" This class handle features definition in datasets and some utilities to display table type."""
1818import copy
19+ import json
1920import re
2021import sys
2122from collections .abc import Iterable
3031import pandas as pd
3132import pyarrow as pa
3233import pyarrow .types
34+ import pyarrow_hotfix # noqa: F401 # to fix vulnerability on pyarrow<14.0.1
3335from pandas .api .extensions import ExtensionArray as PandasExtensionArray
3436from pandas .api .extensions import ExtensionDtype as PandasExtensionDtype
3537from pyarrow .lib import TimestampType
@@ -353,7 +355,7 @@ class Array5D(_ArrayXD):
353355 _type : str = field (default = "Array5D" , init = False , repr = False )
354356
355357
356- class _ArrayXDExtensionType (pa .PyExtensionType ):
358+ class _ArrayXDExtensionType (pa .ExtensionType ):
357359 ndims : Optional [int ] = None
358360
359361 def __init__ (self , shape : tuple , dtype : str ):
@@ -364,13 +366,18 @@ def __init__(self, shape: tuple, dtype: str):
364366 self .shape = tuple (shape )
365367 self .value_type = dtype
366368 self .storage_dtype = self ._generate_dtype (self .value_type )
367- pa .PyExtensionType .__init__ (self , self .storage_dtype )
369+ pa .ExtensionType .__init__ (self , self .storage_dtype , f"{ self .__class__ .__module__ } .{ self .__class__ .__name__ } " )
370+
371+ def __arrow_ext_serialize__ (self ):
372+ return json .dumps ((self .shape , self .value_type )).encode ()
373+
374+ @classmethod
375+ def __arrow_ext_deserialize__ (cls , storage_type , serialized ):
376+ args = json .loads (serialized )
377+ return cls (* args )
368378
369379 def __reduce__ (self ):
370- return self .__class__ , (
371- self .shape ,
372- self .value_type ,
373- )
380+ return self .__arrow_ext_deserialize__ , (self .storage_type , self .__arrow_ext_serialize__ ())
374381
375382 def __arrow_ext_class__ (self ):
376383 return ArrayExtensionArray
@@ -403,6 +410,13 @@ class Array5DExtensionType(_ArrayXDExtensionType):
403410 ndims = 5
404411
405412
413+ # Register the extension types for deserialization
414+ pa .register_extension_type (Array2DExtensionType ((1 , 2 ), "int64" ))
415+ pa .register_extension_type (Array3DExtensionType ((1 , 2 , 3 ), "int64" ))
416+ pa .register_extension_type (Array4DExtensionType ((1 , 2 , 3 , 4 ), "int64" ))
417+ pa .register_extension_type (Array5DExtensionType ((1 , 2 , 3 , 4 , 5 ), "int64" ))
418+
419+
406420def _is_zero_copy_only (pa_type : pa .DataType ) -> bool :
407421 """
408422 When converting a pyarrow array to a numpy array, we must know whether this could be done in zero-copy or not.
0 commit comments