99import pyarrow .compute as pc
1010import pyarrow .types
1111
12- from . import config
1312from .utils .logging import get_logger
1413
1514
@@ -1320,22 +1319,16 @@ def __setstate__(self, state):
13201319 if schema is not None and table .schema != schema :
13211320 # We fix the columns by concatenating with an empty table with the right columns
13221321 empty_table = pa .Table .from_batches ([], schema = schema )
1323- # we set promote=True to fill missing columns with null values
1324- if config .PYARROW_VERSION .major < 14 :
1325- table = pa .concat_tables ([table , empty_table ], promote = True )
1326- else :
1327- table = pa .concat_tables ([table , empty_table ], promote_options = "default" )
1322+ # We set promote_options="default" to fill missing columns with null values
1323+ table = pa .concat_tables ([table , empty_table ], promote_options = "default" )
13281324 ConcatenationTable .__init__ (self , table , blocks = blocks )
13291325
13301326 @staticmethod
13311327 def _concat_blocks (blocks : List [Union [TableBlock , pa .Table ]], axis : int = 0 ) -> pa .Table :
13321328 pa_tables = [table .table if hasattr (table , "table" ) else table for table in blocks ]
13331329 if axis == 0 :
1334- # we set promote=True to fill missing columns with null values
1335- if config .PYARROW_VERSION .major < 14 :
1336- return pa .concat_tables (pa_tables , promote = True )
1337- else :
1338- return pa .concat_tables (pa_tables , promote_options = "default" )
1330+ # We set promote_options="default" to fill missing columns with null values
1331+ return pa .concat_tables (pa_tables , promote_options = "default" )
13391332 elif axis == 1 :
13401333 for i , table in enumerate (pa_tables ):
13411334 if i == 0 :
@@ -1906,17 +1899,9 @@ def array_cast(
19061899 else :
19071900 array = pc .list_slice (array , 0 , pa_type .list_size , return_fixed_size_list = True )
19081901 array_values = array .values
1909- if config .PYARROW_VERSION .major < 15 :
1910- return pa .Array .from_buffers (
1911- pa_type ,
1912- len (array ),
1913- [array .is_valid ().buffers ()[1 ]],
1914- children = [_c (array_values , pa_type .value_type )],
1915- )
1916- else :
1917- return pa .FixedSizeListArray .from_arrays (
1918- _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1919- )
1902+ return pa .FixedSizeListArray .from_arrays (
1903+ _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1904+ )
19201905 else :
19211906 array_values = array .values [
19221907 array .offset * pa_type .list_size : (array .offset + len (array )) * pa_type .list_size
@@ -1932,17 +1917,9 @@ def array_cast(
19321917 array_values = array .values [
19331918 array .offset * array .type .list_size : (array .offset + len (array )) * array .type .list_size
19341919 ]
1935- if config .PYARROW_VERSION .major < 15 :
1936- return pa .Array .from_buffers (
1937- pa_type ,
1938- len (array ),
1939- [array .is_valid ().buffers ()[1 ]],
1940- children = [_c (array_values , pa_type .value_type )],
1941- )
1942- else :
1943- return pa .FixedSizeListArray .from_arrays (
1944- _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1945- )
1920+ return pa .FixedSizeListArray .from_arrays (
1921+ _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1922+ )
19461923 elif pa .types .is_list (pa_type ):
19471924 array_offsets = (np .arange (len (array ) + 1 ) + array .offset ) * array .type .list_size
19481925 return pa .ListArray .from_arrays (array_offsets , _c (array .values , pa_type .value_type ), mask = array .is_null ())
@@ -2055,17 +2032,9 @@ def cast_array_to_feature(
20552032 array = pc .list_slice (array , 0 , feature .length , return_fixed_size_list = True )
20562033 array_values = array .values
20572034 casted_array_values = _c (array_values , feature .feature )
2058- if config .PYARROW_VERSION .major < 15 :
2059- return pa .Array .from_buffers (
2060- pa .list_ (casted_array_values .type , feature .length ),
2061- len (array ),
2062- [array .is_valid ().buffers ()[1 ]],
2063- children = [casted_array_values ],
2064- )
2065- else :
2066- return pa .FixedSizeListArray .from_arrays (
2067- casted_array_values , feature .length , mask = array .is_null ()
2068- )
2035+ return pa .FixedSizeListArray .from_arrays (
2036+ casted_array_values , feature .length , mask = array .is_null ()
2037+ )
20692038 else :
20702039 array_values = array .values [
20712040 array .offset * feature .length : (array .offset + len (array )) * feature .length
@@ -2091,17 +2060,7 @@ def cast_array_to_feature(
20912060 array .offset * array .type .list_size : (array .offset + len (array )) * array .type .list_size
20922061 ]
20932062 casted_array_values = _c (array_values , feature .feature )
2094- if config .PYARROW_VERSION .major < 15 :
2095- return pa .Array .from_buffers (
2096- pa .list_ (casted_array_values .type , feature .length ),
2097- len (array ),
2098- [array .is_valid ().buffers ()[1 ]],
2099- children = [casted_array_values ],
2100- )
2101- else :
2102- return pa .FixedSizeListArray .from_arrays (
2103- casted_array_values , feature .length , mask = array .is_null ()
2104- )
2063+ return pa .FixedSizeListArray .from_arrays (casted_array_values , feature .length , mask = array .is_null ())
21052064 else :
21062065 array_offsets = (np .arange (len (array ) + 1 ) + array .offset ) * array .type .list_size
21072066 return pa .ListArray .from_arrays (array_offsets , _c (array .values , feature .feature ), mask = array .is_null ())
@@ -2176,15 +2135,7 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType"):
21762135 array .offset * array .type .list_size : (array .offset + len (array )) * array .type .list_size
21772136 ]
21782137 embedded_array_values = _e (array_values , feature .feature )
2179- if config .PYARROW_VERSION .major < 15 :
2180- return pa .Array .from_buffers (
2181- pa .list_ (array_values .type , feature .length ),
2182- len (array ),
2183- [array .is_valid ().buffers ()[1 ]],
2184- children = [embedded_array_values ],
2185- )
2186- else :
2187- return pa .FixedSizeListArray .from_arrays (embedded_array_values , feature .length , mask = array .is_null ())
2138+ return pa .FixedSizeListArray .from_arrays (embedded_array_values , feature .length , mask = array .is_null ())
21882139 if not isinstance (feature , (Sequence , dict , list , tuple )):
21892140 return array
21902141 raise TypeError (f"Couldn't embed array of type\n { _short_str (array .type )} \n with\n { _short_str (feature )} " )
0 commit comments