@@ -29,8 +29,9 @@ use arrow_array::{
2929 TimestampNanosecondBufferBuilder , TimestampSecondBufferBuilder ,
3030 } ,
3131 ArrayRef , BooleanArray , Decimal128Array , Decimal256Array , Float32Array , Float64Array ,
32- Int32Array , Int64Array , TimestampMicrosecondArray , TimestampMillisecondArray ,
33- TimestampNanosecondArray , TimestampSecondArray , UInt32Array , UInt64Array ,
32+ Int16Array , Int32Array , Int64Array , Int8Array , TimestampMicrosecondArray ,
33+ TimestampMillisecondArray , TimestampNanosecondArray , TimestampSecondArray , UInt16Array ,
34+ UInt32Array , UInt64Array , UInt8Array ,
3435} ;
3536use arrow_buffer:: { i256, BooleanBuffer , Buffer } ;
3637use arrow_data:: ArrayDataBuilder ;
@@ -261,6 +262,45 @@ where
261262 // - date64: cast int32 to date32, then date32 to date64.
262263 // - decimal: cast int32 to decimal, int64 to decimal
263264 let array = match target_type {
265+ // Using `arrow_cast::cast` has been found to be very slow for converting
266+ // INT32 physical type to lower bitwidth logical types. Since rust casts
267+ // are infallible, instead use `unary` which is much faster (by up to 40%).
268+ // One consequence of this approach is that some malformed integer columns
269+ // will return (an arguably correct) result rather than null.
270+ // See https://github.com/apache/arrow-rs/issues/7040 for a discussion of this
271+ // issue.
272+ ArrowType :: UInt8 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
273+ let array = array
274+ . as_any ( )
275+ . downcast_ref :: < Int32Array > ( )
276+ . unwrap ( )
277+ . unary ( |i| i as u8 ) as UInt8Array ;
278+ Arc :: new ( array) as ArrayRef
279+ }
280+ ArrowType :: Int8 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
281+ let array = array
282+ . as_any ( )
283+ . downcast_ref :: < Int32Array > ( )
284+ . unwrap ( )
285+ . unary ( |i| i as i8 ) as Int8Array ;
286+ Arc :: new ( array) as ArrayRef
287+ }
288+ ArrowType :: UInt16 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
289+ let array = array
290+ . as_any ( )
291+ . downcast_ref :: < Int32Array > ( )
292+ . unwrap ( )
293+ . unary ( |i| i as u16 ) as UInt16Array ;
294+ Arc :: new ( array) as ArrayRef
295+ }
296+ ArrowType :: Int16 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
297+ let array = array
298+ . as_any ( )
299+ . downcast_ref :: < Int32Array > ( )
300+ . unwrap ( )
301+ . unary ( |i| i as i16 ) as Int16Array ;
302+ Arc :: new ( array) as ArrayRef
303+ }
264304 ArrowType :: Date64 if * ( array. data_type ( ) ) == ArrowType :: Int32 => {
265305 // this is cheap as it internally reinterprets the data
266306 let a = arrow_cast:: cast ( & array, & ArrowType :: Date32 ) ?;
0 commit comments