@@ -211,19 +211,45 @@ fn write_leaf(
211211 let indices = levels. filter_array_indices ( ) ;
212212 let written = match writer {
213213 ColumnWriter :: Int32ColumnWriter ( ref mut typed) => {
214- // If the column is a Date64, we cast it to a Date32, and then interpret that as Int32
215- let array = if let ArrowDataType :: Date64 = column. data_type ( ) {
216- let array = arrow:: compute:: cast ( column, & ArrowDataType :: Date32 ) ?;
217- arrow:: compute:: cast ( & array, & ArrowDataType :: Int32 ) ?
218- } else {
219- arrow:: compute:: cast ( column, & ArrowDataType :: Int32 ) ?
214+ let values = match column. data_type ( ) {
215+ ArrowDataType :: Date64 => {
216+ // If the column is a Date64, we cast it to a Date32, and then interpret that as Int32
217+ let array = if let ArrowDataType :: Date64 = column. data_type ( ) {
218+ let array = arrow:: compute:: cast ( column, & ArrowDataType :: Date32 ) ?;
219+ arrow:: compute:: cast ( & array, & ArrowDataType :: Int32 ) ?
220+ } else {
221+ arrow:: compute:: cast ( column, & ArrowDataType :: Int32 ) ?
222+ } ;
223+ let array = array
224+ . as_any ( )
225+ . downcast_ref :: < arrow_array:: Int32Array > ( )
226+ . expect ( "Unable to get int32 array" ) ;
227+ get_numeric_array_slice :: < Int32Type , _ > ( & array, & indices)
228+ }
229+ ArrowDataType :: UInt32 => {
230+ // follow C++ implementation and use overflow/reinterpret cast from u32 to i32 which will map
231+ // `(i32::MAX as u32)..u32::MAX` to `i32::MIN..0`
232+ let array = column
233+ . as_any ( )
234+ . downcast_ref :: < arrow_array:: UInt32Array > ( )
235+ . expect ( "Unable to get u32 array" ) ;
236+ let array = arrow:: compute:: unary :: < _ , _ , arrow:: datatypes:: Int32Type > (
237+ array,
238+ |x| x as i32 ,
239+ ) ;
240+ get_numeric_array_slice :: < Int32Type , _ > ( & array, & indices)
241+ }
242+ _ => {
243+ let array = arrow:: compute:: cast ( column, & ArrowDataType :: Int32 ) ?;
244+ let array = array
245+ . as_any ( )
246+ . downcast_ref :: < arrow_array:: Int32Array > ( )
247+ . expect ( "Unable to get i32 array" ) ;
248+ get_numeric_array_slice :: < Int32Type , _ > ( & array, & indices)
249+ }
220250 } ;
221- let array = array
222- . as_any ( )
223- . downcast_ref :: < arrow_array:: Int32Array > ( )
224- . expect ( "Unable to get int32 array" ) ;
225251 typed. write_batch (
226- get_numeric_array_slice :: < Int32Type , _ > ( & array , & indices ) . as_slice ( ) ,
252+ values . as_slice ( ) ,
227253 Some ( levels. definition . as_slice ( ) ) ,
228254 levels. repetition . as_deref ( ) ,
229255 ) ?
@@ -1469,6 +1495,37 @@ mod tests {
14691495 ) ;
14701496 }
14711497
1498+ #[ test]
1499+ fn u32_min_max ( ) {
1500+ // check values roundtrip through parquet
1501+ let values = Arc :: new ( UInt32Array :: from_iter_values ( vec ! [
1502+ u32 :: MIN ,
1503+ u32 :: MIN + 1 ,
1504+ ( i32 :: MAX as u32 ) - 1 ,
1505+ i32 :: MAX as u32 ,
1506+ ( i32 :: MAX as u32 ) + 1 ,
1507+ u32 :: MAX - 1 ,
1508+ u32 :: MAX ,
1509+ ] ) ) ;
1510+ let file = one_column_roundtrip ( "u32_min_max_single_column" , values, false ) ;
1511+
1512+ // check statistics are valid
1513+ let reader = SerializedFileReader :: new ( file) . unwrap ( ) ;
1514+ let metadata = reader. metadata ( ) ;
1515+ assert_eq ! ( metadata. num_row_groups( ) , 1 ) ;
1516+ let row_group = metadata. row_group ( 0 ) ;
1517+ assert_eq ! ( row_group. num_columns( ) , 1 ) ;
1518+ let column = row_group. column ( 0 ) ;
1519+ let stats = column. statistics ( ) . unwrap ( ) ;
1520+ assert ! ( stats. has_min_max_set( ) ) ;
1521+ if let Statistics :: Int32 ( stats) = stats {
1522+ assert_eq ! ( * stats. min( ) as u32 , u32 :: MIN ) ;
1523+ assert_eq ! ( * stats. max( ) as u32 , u32 :: MAX ) ;
1524+ } else {
1525+ panic ! ( "Statistics::Int32 missing" )
1526+ }
1527+ }
1528+
14721529 #[ test]
14731530 fn u64_min_max ( ) {
14741531 // check values roundtrip through parquet
0 commit comments