Skip to content

Commit 25fd4a1

Browse files
alambion-elgreco
authored andcommitted
chore: use builder API to create FileScanConfig
Signed-off-by: Andrew Lamb <[email protected]>
1 parent c1bbc4c commit 25fd4a1

File tree

2 files changed

+34
-56
lines changed

2 files changed

+34
-56
lines changed

crates/core/src/delta_datafusion/mod.rs

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ use datafusion::physical_optimizer::pruning::PruningPredicate;
5151
use datafusion_common::scalar::ScalarValue;
5252
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
5353
use datafusion_common::{
54-
config::ConfigOptions, Column, Constraints, DFSchema, DataFusionError,
55-
Result as DataFusionResult, TableReference, ToDFSchema,
54+
config::ConfigOptions, Column, DFSchema, DataFusionError, Result as DataFusionResult,
55+
TableReference, ToDFSchema,
5656
};
5757
use datafusion_expr::execution_props::ExecutionProps;
5858
use datafusion_expr::logical_plan::CreateExternalTable;
@@ -648,25 +648,24 @@ impl<'a> DeltaScanBuilder<'a> {
648648
..Default::default()
649649
};
650650

651-
let mut exec_plan_builder = ParquetExecBuilder::new(FileScanConfig {
652-
object_store_url: self.log_store.object_store_url(),
653-
file_schema,
654-
// If all files were filtered out, we still need to emit at least one partition to
655-
// pass datafusion sanity checks.
656-
//
657-
// See https://github.com/apache/datafusion/issues/11322
658-
file_groups: if file_groups.is_empty() {
659-
vec![vec![]]
660-
} else {
661-
file_groups.into_values().collect()
662-
},
663-
constraints: Constraints::default(),
664-
statistics: stats,
665-
projection: self.projection.cloned(),
666-
limit: self.limit,
667-
table_partition_cols,
668-
output_ordering: vec![],
669-
})
651+
let mut exec_plan_builder = ParquetExecBuilder::new(
652+
FileScanConfig::new(self.log_store.object_store_url(), file_schema)
653+
.with_file_groups(
654+
// If all files were filtered out, we still need to emit at least one partition to
655+
// pass datafusion sanity checks.
656+
//
657+
// See https://github.com/apache/datafusion/issues/11322
658+
if file_groups.is_empty() {
659+
vec![vec![]]
660+
} else {
661+
file_groups.into_values().collect()
662+
},
663+
)
664+
.with_statistics(stats)
665+
.with_projection(self.projection.cloned())
666+
.with_limit(self.limit)
667+
.with_table_partition_cols(table_partition_cols),
668+
)
670669
.with_schema_adapter_factory(Arc::new(DeltaSchemaAdapterFactory {}))
671670
.with_table_parquet_options(parquet_options);
672671

crates/core/src/operations/load_cdf.rs

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use datafusion::datasource::file_format::FileFormat;
2121
use datafusion::datasource::physical_plan::FileScanConfig;
2222
use datafusion::execution::SessionState;
2323
use datafusion::prelude::SessionContext;
24-
use datafusion_common::{Constraints, ScalarValue, Statistics};
24+
use datafusion_common::ScalarValue;
2525
use datafusion_physical_expr::{expressions, PhysicalExpr};
2626
use datafusion_physical_plan::projection::ProjectionExec;
2727
use datafusion_physical_plan::union::UnionExec;
@@ -377,53 +377,32 @@ impl CdfLoadBuilder {
377377
let cdc_scan = ParquetFormat::new()
378378
.create_physical_plan(
379379
session_sate,
380-
FileScanConfig {
381-
object_store_url: self.log_store.object_store_url(),
382-
file_schema: cdc_file_schema.clone(),
383-
file_groups: cdc_file_groups.into_values().collect(),
384-
constraints: Constraints::default(),
385-
statistics: Statistics::new_unknown(&cdc_file_schema),
386-
projection: None,
387-
limit: None,
388-
table_partition_cols: cdc_partition_cols,
389-
output_ordering: vec![],
390-
},
380+
FileScanConfig::new(self.log_store.object_store_url(), cdc_file_schema)
381+
.with_file_groups(cdc_file_groups.into_values().collect())
382+
.with_table_partition_cols(cdc_partition_cols),
391383
filters,
392384
)
393385
.await?;
394386

395387
let add_scan = ParquetFormat::new()
396388
.create_physical_plan(
397389
session_sate,
398-
FileScanConfig {
399-
object_store_url: self.log_store.object_store_url(),
400-
file_schema: add_remove_file_schema.clone(),
401-
file_groups: add_file_groups.into_values().collect(),
402-
constraints: Constraints::default(),
403-
statistics: Statistics::new_unknown(&add_remove_file_schema.clone()),
404-
projection: None,
405-
limit: None,
406-
table_partition_cols: add_remove_partition_cols.clone(),
407-
output_ordering: vec![],
408-
},
390+
FileScanConfig::new(
391+
self.log_store.object_store_url(),
392+
add_remove_file_schema.clone(),
393+
)
394+
.with_file_groups(add_file_groups.into_values().collect())
395+
.with_table_partition_cols(add_remove_partition_cols.clone()),
409396
filters,
410397
)
411398
.await?;
412399

413400
let remove_scan = ParquetFormat::new()
414401
.create_physical_plan(
415402
session_sate,
416-
FileScanConfig {
417-
object_store_url: self.log_store.object_store_url(),
418-
file_schema: add_remove_file_schema.clone(),
419-
file_groups: remove_file_groups.into_values().collect(),
420-
constraints: Constraints::default(),
421-
statistics: Statistics::new_unknown(&add_remove_file_schema),
422-
projection: None,
423-
limit: None,
424-
table_partition_cols: add_remove_partition_cols,
425-
output_ordering: vec![],
426-
},
403+
FileScanConfig::new(self.log_store.object_store_url(), add_remove_file_schema)
404+
.with_file_groups(remove_file_groups.into_values().collect())
405+
.with_table_partition_cols(add_remove_partition_cols),
427406
filters,
428407
)
429408
.await?;
@@ -434,7 +413,7 @@ impl CdfLoadBuilder {
434413
Arc::new(UnionExec::new(vec![cdc_scan, add_scan, remove_scan]));
435414

436415
// We project the union in the order of the input_schema + cdc cols at the end
437-
// This is to ensure the DeltaCdfTableProvider uses the correct schema consturction.
416+
// This is to ensure the DeltaCdfTableProvider uses the correct schema construction.
438417
let mut fields = schema.fields().to_vec();
439418
for f in ADD_PARTITION_SCHEMA.clone() {
440419
fields.push(f.into());

0 commit comments

Comments
 (0)