@@ -49,7 +49,7 @@ use log::debug;
4949use parquet:: arrow:: arrow_reader:: { ArrowReaderMetadata , ArrowReaderOptions } ;
5050use parquet:: arrow:: async_reader:: AsyncFileReader ;
5151use parquet:: arrow:: { ParquetRecordBatchStreamBuilder , ProjectionMask } ;
52- use parquet:: file:: metadata:: ParquetMetaDataReader ;
52+ use parquet:: file:: metadata:: { PageIndexPolicy , ParquetMetaDataReader } ;
5353
5454/// Implements [`FileOpener`] for a parquet file
5555pub ( super ) struct ParquetOpener {
@@ -83,6 +83,8 @@ pub(super) struct ParquetOpener {
8383 /// Should the page index be read from parquet files, if present, to skip
8484 /// data pages
8585 pub enable_page_index : bool ,
86+ /// Should the Parquet reader tolerate missing page indexes?
87+ pub tolerate_missing_page_index : bool ,
8688 /// Should the bloom filter be read from parquet, if present, to skip row
8789 /// groups
8890 pub enable_bloom_filter : bool ,
@@ -149,6 +151,8 @@ impl FileOpener for ParquetOpener {
149151 enable_page_index = false ;
150152 }
151153
154+ let tolerate_missing_page_index = self . tolerate_missing_page_index ;
155+
152156 Ok ( Box :: pin ( async move {
153157 // Prune this file using the file level statistics and partition values.
154158 // Since dynamic filters may have been updated since planning it is possible that we are able
@@ -279,11 +283,16 @@ impl FileOpener for ParquetOpener {
279283 // code above may not have read the page index structures yet. If we
280284 // need them for reading and they aren't yet loaded, we need to load them now.
281285 if should_enable_page_index ( enable_page_index, & page_pruning_predicate) {
286+ let page_index_policy = if tolerate_missing_page_index {
287+ PageIndexPolicy :: Optional
288+ } else {
289+ PageIndexPolicy :: Required
290+ } ;
282291 reader_metadata = load_page_index (
283292 reader_metadata,
284293 & mut async_file_reader,
285- // Since we're manually loading the page index the option here should not matter but we pass it in for consistency
286- options . with_page_index ( true ) ,
294+ options . with_page_index_policy ( page_index_policy ) ,
295+ page_index_policy ,
287296 )
288297 . await ?;
289298 }
@@ -483,6 +492,7 @@ async fn load_page_index<T: AsyncFileReader>(
483492 reader_metadata : ArrowReaderMetadata ,
484493 input : & mut T ,
485494 options : ArrowReaderOptions ,
495+ page_index_policy : PageIndexPolicy ,
486496) -> Result < ArrowReaderMetadata > {
487497 let parquet_metadata = reader_metadata. metadata ( ) ;
488498 let missing_column_index = parquet_metadata. column_index ( ) . is_none ( ) ;
@@ -495,8 +505,9 @@ async fn load_page_index<T: AsyncFileReader>(
495505 if missing_column_index || missing_offset_index {
496506 let m = Arc :: try_unwrap ( Arc :: clone ( parquet_metadata) )
497507 . unwrap_or_else ( |e| e. as_ref ( ) . clone ( ) ) ;
498- let mut reader =
499- ParquetMetaDataReader :: new_with_metadata ( m) . with_page_indexes ( true ) ;
508+ let mut reader = ParquetMetaDataReader :: new_with_metadata ( m)
509+ . with_page_index_policy ( page_index_policy) ;
510+
500511 reader. load_page_index ( input) . await ?;
501512 let new_parquet_metadata = reader. finish ( ) ?;
502513 let new_arrow_reader =
@@ -651,6 +662,7 @@ mod test {
651662
652663 let make_opener = |predicate| {
653664 ParquetOpener {
665+ tolerate_missing_page_index : false ,
654666 partition_index : 0 ,
655667 projection : Arc :: new ( [ 0 , 1 ] ) ,
656668 batch_size : 1024 ,
@@ -733,6 +745,7 @@ mod test {
733745
734746 let make_opener = |predicate| {
735747 ParquetOpener {
748+ tolerate_missing_page_index : false ,
736749 partition_index : 0 ,
737750 projection : Arc :: new ( [ 0 ] ) ,
738751 batch_size : 1024 ,
@@ -835,6 +848,7 @@ mod test {
835848 ] ) ) ;
836849 let make_opener = |predicate| {
837850 ParquetOpener {
851+ tolerate_missing_page_index : false ,
838852 partition_index : 0 ,
839853 projection : Arc :: new ( [ 0 ] ) ,
840854 batch_size : 1024 ,
@@ -947,6 +961,7 @@ mod test {
947961
948962 let make_opener = |predicate| {
949963 ParquetOpener {
964+ tolerate_missing_page_index : false ,
950965 partition_index : 0 ,
951966 projection : Arc :: new ( [ 0 ] ) ,
952967 batch_size : 1024 ,
@@ -1060,6 +1075,7 @@ mod test {
10601075
10611076 let make_opener = |predicate| {
10621077 ParquetOpener {
1078+ tolerate_missing_page_index : false ,
10631079 partition_index : 0 ,
10641080 projection : Arc :: new ( [ 0 ] ) ,
10651081 batch_size : 1024 ,
@@ -1244,6 +1260,7 @@ mod test {
12441260 } ;
12451261
12461262 let make_opener = |predicate| ParquetOpener {
1263+ tolerate_missing_page_index : false ,
12471264 partition_index : 0 ,
12481265 projection : Arc :: new ( [ 0 , 1 ] ) ,
12491266 batch_size : 1024 ,
0 commit comments