@@ -39,9 +39,8 @@ use async_trait::async_trait;
3939use chrono:: { DateTime , TimeZone , Utc } ;
4040use datafusion:: catalog:: { Session , TableProviderFactory } ;
4141use datafusion:: config:: TableParquetOptions ;
42- use datafusion:: datasource:: physical_plan:: parquet:: ParquetExecBuilder ;
4342use datafusion:: datasource:: physical_plan:: {
44- wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig ,
43+ wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig , ParquetSource ,
4544} ;
4645use datafusion:: datasource:: { listing:: PartitionedFile , MemTable , TableProvider , TableType } ;
4746use datafusion:: execution:: context:: { SessionConfig , SessionContext , SessionState , TaskContext } ;
@@ -648,36 +647,39 @@ impl<'a> DeltaScanBuilder<'a> {
648647 ..Default :: default ( )
649648 } ;
650649
651- let mut exec_plan_builder = ParquetExecBuilder :: new (
652- FileScanConfig :: new ( self . log_store . object_store_url ( ) , file_schema)
653- . with_file_groups (
654- // If all files were filtered out, we still need to emit at least one partition to
655- // pass datafusion sanity checks.
656- //
657- // See https://github.com/apache/datafusion/issues/11322
658- if file_groups. is_empty ( ) {
659- vec ! [ vec![ ] ]
660- } else {
661- file_groups. into_values ( ) . collect ( )
662- } ,
663- )
664- . with_statistics ( stats)
665- . with_projection ( self . projection . cloned ( ) )
666- . with_limit ( self . limit )
667- . with_table_partition_cols ( table_partition_cols) ,
668- )
669- . with_schema_adapter_factory ( Arc :: new ( DeltaSchemaAdapterFactory { } ) )
670- . with_table_parquet_options ( parquet_options) ;
650+ let mut file_source = ParquetSource :: new ( parquet_options)
651+ . with_schema_adapter_factory ( Arc :: new ( DeltaSchemaAdapterFactory { } ) ) ;
671652
672653 // Sometimes (i.e Merge) we want to prune files that don't make the
673654 // filter and read the entire contents for files that do match the
674655 // filter
675656 if let Some ( predicate) = logical_filter {
676657 if config. enable_parquet_pushdown {
677- exec_plan_builder = exec_plan_builder . with_predicate ( predicate) ;
658+ file_source = file_source . with_predicate ( Arc :: clone ( & file_schema ) , predicate) ;
678659 }
679660 } ;
680661
662+ let file_scan_config = FileScanConfig :: new (
663+ self . log_store . object_store_url ( ) ,
664+ file_schema,
665+ Arc :: new ( file_source) ,
666+ )
667+ . with_file_groups (
668+ // If all files were filtered out, we still need to emit at least one partition to
669+ // pass datafusion sanity checks.
670+ //
671+ // See https://github.com/apache/datafusion/issues/11322
672+ if file_groups. is_empty ( ) {
673+ vec ! [ vec![ ] ]
674+ } else {
675+ file_groups. into_values ( ) . collect ( )
676+ } ,
677+ )
678+ . with_statistics ( stats)
679+ . with_projection ( self . projection . cloned ( ) )
680+ . with_limit ( self . limit )
681+ . with_table_partition_cols ( table_partition_cols) ;
682+
681683 let metrics = ExecutionPlanMetricsSet :: new ( ) ;
682684 MetricBuilder :: new ( & metrics)
683685 . global_counter ( "files_scanned" )
@@ -688,7 +690,7 @@ impl<'a> DeltaScanBuilder<'a> {
688690
689691 Ok ( DeltaScan {
690692 table_uri : ensure_table_uri ( self . log_store . root_uri ( ) ) ?. as_str ( ) . into ( ) ,
691- parquet_scan : exec_plan_builder . build_arc ( ) ,
693+ parquet_scan : file_scan_config . build ( ) ,
692694 config,
693695 logical_schema,
694696 metrics,
@@ -1960,7 +1962,7 @@ mod tests {
19601962 use bytes:: Bytes ;
19611963 use chrono:: { TimeZone , Utc } ;
19621964 use datafusion:: assert_batches_sorted_eq;
1963- use datafusion:: datasource:: physical_plan :: ParquetExec ;
1965+ use datafusion:: datasource:: source :: DataSourceExec ;
19641966 use datafusion:: physical_plan:: empty:: EmptyExec ;
19651967 use datafusion:: physical_plan:: { visit_execution_plan, ExecutionPlanVisitor , PhysicalExpr } ;
19661968 use datafusion_expr:: lit;
@@ -2713,7 +2715,7 @@ mod tests {
27132715 . await
27142716 . unwrap ( ) ;
27152717
2716- let mut visitor = ParquetPredicateVisitor :: default ( ) ;
2718+ let mut visitor = ParquetVisitor :: default ( ) ;
27172719 visit_execution_plan ( & scan, & mut visitor) . unwrap ( ) ;
27182720
27192721 assert_eq ! ( visitor. predicate. unwrap( ) . to_string( ) , "a@0 = s" ) ;
@@ -2748,7 +2750,7 @@ mod tests {
27482750 . await
27492751 . unwrap ( ) ;
27502752
2751- let mut visitor = ParquetPredicateVisitor :: default ( ) ;
2753+ let mut visitor = ParquetVisitor :: default ( ) ;
27522754 visit_execution_plan ( & scan, & mut visitor) . unwrap ( ) ;
27532755
27542756 assert ! ( visitor. predicate. is_none( ) ) ;
@@ -2777,42 +2779,46 @@ mod tests {
27772779 . await
27782780 . unwrap ( ) ;
27792781
2780- let mut visitor = ParquetOptionsVisitor :: default ( ) ;
2782+ let mut visitor = ParquetVisitor :: default ( ) ;
27812783 visit_execution_plan ( & scan, & mut visitor) . unwrap ( ) ;
27822784
27832785 assert_eq ! ( ctx. copied_table_options( ) . parquet, visitor. options. unwrap( ) ) ;
27842786 }
27852787
2788+ /// Extracts fields from the parquet scan
27862789 #[ derive( Default ) ]
2787- struct ParquetPredicateVisitor {
2790+ struct ParquetVisitor {
27882791 predicate : Option < Arc < dyn PhysicalExpr > > ,
27892792 pruning_predicate : Option < Arc < PruningPredicate > > ,
2793+ options : Option < TableParquetOptions > ,
27902794 }
27912795
2792- impl ExecutionPlanVisitor for ParquetPredicateVisitor {
2796+ impl ExecutionPlanVisitor for ParquetVisitor {
27932797 type Error = DataFusionError ;
27942798
27952799 fn pre_visit ( & mut self , plan : & dyn ExecutionPlan ) -> Result < bool , Self :: Error > {
2796- if let Some ( parquet_exec) = plan. as_any ( ) . downcast_ref :: < ParquetExec > ( ) {
2797- self . predicate = parquet_exec. predicate ( ) . cloned ( ) ;
2798- self . pruning_predicate = parquet_exec. pruning_predicate ( ) . cloned ( ) ;
2799- }
2800- Ok ( true )
2801- }
2802- }
2803-
2804- #[ derive( Default ) ]
2805- struct ParquetOptionsVisitor {
2806- options : Option < TableParquetOptions > ,
2807- }
2800+ let Some ( datasource_exec) = plan. as_any ( ) . downcast_ref :: < DataSourceExec > ( ) else {
2801+ return Ok ( true ) ;
2802+ } ;
28082803
2809- impl ExecutionPlanVisitor for ParquetOptionsVisitor {
2810- type Error = DataFusionError ;
2804+ let Some ( scan_config) = datasource_exec
2805+ . data_source ( )
2806+ . as_any ( )
2807+ . downcast_ref :: < FileScanConfig > ( )
2808+ else {
2809+ return Ok ( true ) ;
2810+ } ;
28112811
2812- fn pre_visit ( & mut self , plan : & dyn ExecutionPlan ) -> Result < bool , Self :: Error > {
2813- if let Some ( parquet_exec) = plan. as_any ( ) . downcast_ref :: < ParquetExec > ( ) {
2814- self . options = Some ( parquet_exec. table_parquet_options ( ) . clone ( ) )
2812+ if let Some ( parquet_source) = scan_config
2813+ . file_source
2814+ . as_any ( )
2815+ . downcast_ref :: < ParquetSource > ( )
2816+ {
2817+ self . options = Some ( parquet_source. table_parquet_options ( ) . clone ( ) ) ;
2818+ self . predicate = parquet_source. predicate ( ) . cloned ( ) ;
2819+ self . pruning_predicate = parquet_source. pruning_predicate ( ) . cloned ( ) ;
28152820 }
2821+
28162822 Ok ( true )
28172823 }
28182824 }
0 commit comments