@@ -337,6 +337,14 @@ impl ParquetExecStream {
337337 file_metrics,
338338 ) ) ;
339339 }
340+ if let Some ( range) = & file. range {
341+ assert ! (
342+ range. start >= 0 && range. end > 0 && range. end > range. start,
343+ "invalid range specified: {:?}" ,
344+ range
345+ ) ;
346+ opt = opt. with_range ( range. start , range. end ) ;
347+ }
340348
341349 let file_reader = SerializedFileReader :: new_with_options (
342350 ChunkObjectReader ( object_reader) ,
@@ -649,13 +657,15 @@ mod tests {
649657 } ;
650658
651659 use super :: * ;
660+ use crate :: datasource:: listing:: FileRange ;
652661 use crate :: execution:: options:: CsvReadOptions ;
653662 use crate :: prelude:: { ParquetReadOptions , SessionConfig , SessionContext } ;
654663 use arrow:: array:: Float32Array ;
655664 use arrow:: {
656665 array:: { Int64Array , Int8Array , StringArray } ,
657666 datatypes:: { DataType , Field } ,
658667 } ;
668+ use datafusion_data_access:: object_store:: local;
659669 use datafusion_expr:: { col, lit} ;
660670 use futures:: StreamExt ;
661671 use parquet:: {
@@ -1099,6 +1109,81 @@ mod tests {
10991109 Ok ( ( ) )
11001110 }
11011111
1112+ #[ tokio:: test]
1113+ async fn parquet_exec_with_range ( ) -> Result < ( ) > {
1114+ fn file_range ( file : String , start : i64 , end : i64 ) -> PartitionedFile {
1115+ PartitionedFile {
1116+ file_meta : local:: local_unpartitioned_file ( file) ,
1117+ partition_values : vec ! [ ] ,
1118+ range : Some ( FileRange { start, end } ) ,
1119+ }
1120+ }
1121+
1122+ async fn assert_parquet_read (
1123+ file_groups : Vec < Vec < PartitionedFile > > ,
1124+ expected_row_num : Option < usize > ,
1125+ task_ctx : Arc < TaskContext > ,
1126+ file_schema : SchemaRef ,
1127+ ) -> Result < ( ) > {
1128+ let parquet_exec = ParquetExec :: new (
1129+ FileScanConfig {
1130+ object_store : Arc :: new ( LocalFileSystem { } ) ,
1131+ file_groups,
1132+ file_schema,
1133+ statistics : Statistics :: default ( ) ,
1134+ projection : None ,
1135+ limit : None ,
1136+ table_partition_cols : vec ! [ ] ,
1137+ } ,
1138+ None ,
1139+ ) ;
1140+ assert_eq ! ( parquet_exec. output_partitioning( ) . partition_count( ) , 1 ) ;
1141+ let results = parquet_exec. execute ( 0 , task_ctx) . await ?. next ( ) . await ;
1142+
1143+ if let Some ( expected_row_num) = expected_row_num {
1144+ let batch = results. unwrap ( ) ?;
1145+ assert_eq ! ( expected_row_num, batch. num_rows( ) ) ;
1146+ } else {
1147+ assert ! ( results. is_none( ) ) ;
1148+ }
1149+
1150+ Ok ( ( ) )
1151+ }
1152+
1153+ let session_ctx = SessionContext :: new ( ) ;
1154+ let testdata = crate :: test_util:: parquet_test_data ( ) ;
1155+ let filename = format ! ( "{}/alltypes_plain.parquet" , testdata) ;
1156+ let file_schema = ParquetFormat :: default ( )
1157+ . infer_schema ( local_object_reader_stream ( vec ! [ filename. clone( ) ] ) )
1158+ . await ?;
1159+
1160+ let group_empty = vec ! [ vec![ file_range( filename. clone( ) , 0 , 5 ) ] ] ;
1161+ let group_contain = vec ! [ vec![ file_range( filename. clone( ) , 5 , i64 :: MAX ) ] ] ;
1162+ let group_all = vec ! [ vec![
1163+ file_range( filename. clone( ) , 0 , 5 ) ,
1164+ file_range( filename. clone( ) , 5 , i64 :: MAX ) ,
1165+ ] ] ;
1166+
1167+ assert_parquet_read (
1168+ group_empty,
1169+ None ,
1170+ session_ctx. task_ctx ( ) ,
1171+ file_schema. clone ( ) ,
1172+ )
1173+ . await ?;
1174+ assert_parquet_read (
1175+ group_contain,
1176+ Some ( 8 ) ,
1177+ session_ctx. task_ctx ( ) ,
1178+ file_schema. clone ( ) ,
1179+ )
1180+ . await ?;
1181+ assert_parquet_read ( group_all, Some ( 8 ) , session_ctx. task_ctx ( ) , file_schema)
1182+ . await ?;
1183+
1184+ Ok ( ( ) )
1185+ }
1186+
11021187 #[ tokio:: test]
11031188 async fn parquet_exec_with_partition ( ) -> Result < ( ) > {
11041189 let session_ctx = SessionContext :: new ( ) ;
@@ -1171,6 +1256,7 @@ mod tests {
11711256 last_modified : None ,
11721257 } ,
11731258 partition_values : vec ! [ ] ,
1259+ range : None ,
11741260 } ;
11751261
11761262 let parquet_exec = ParquetExec :: new (
0 commit comments