Skip to content

Commit 0dbdd94

Browse files
committed
[DataFrame] Read files in parallel
1 parent 413eba1 commit 0dbdd94

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed

datafusion/core/src/execution/context.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use datafusion_common::alias::AliasGenerator;
3232
use datafusion_execution::registry::SerializerRegistry;
3333
use datafusion_expr::{
3434
logical_plan::{DdlStatement, Statement},
35-
DescribeTable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
35+
DescribeTable, Partitioning, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
3636
};
3737
pub use datafusion_physical_expr::execution_props::ExecutionProps;
3838
use datafusion_physical_expr::var_provider::is_system_variables;
@@ -917,11 +917,16 @@ impl SessionContext {
917917
/// Creates a [`DataFrame`] for a [`TableProvider`] such as a
918918
/// [`ListingTable`] or a custom user defined provider.
919919
pub fn read_table(&self, provider: Arc<dyn TableProvider>) -> Result<DataFrame> {
920-
Ok(DataFrame::new(
921-
self.state(),
922-
LogicalPlanBuilder::scan(UNNAMED_TABLE, provider_as_source(provider), None)?
923-
.build()?,
924-
))
920+
let state = self.state();
921+
let mut builder =
922+
LogicalPlanBuilder::scan(UNNAMED_TABLE, provider_as_source(provider), None)?;
923+
let target_partitions = state.config.target_partitions();
924+
if target_partitions > 1 {
925+
// Keep the data in the target number of partitions
926+
builder =
927+
builder.repartition(Partitioning::RoundRobinBatch(target_partitions))?;
928+
}
929+
Ok(DataFrame::new(state, builder.build()?))
925930
}
926931

927932
/// Creates a [`DataFrame`] for reading a [`RecordBatch`]

datafusion/core/tests/dataframe/mod.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,8 @@ async fn test_grouping_sets() -> Result<()> {
685685

686686
#[tokio::test]
687687
async fn test_grouping_sets_count() -> Result<()> {
688-
let ctx = SessionContext::new();
688+
let config = SessionConfig::new().with_target_partitions(1);
689+
let ctx = SessionContext::with_config(config);
689690

690691
let grouping_set_expr = Expr::GroupingSet(GroupingSet::GroupingSets(vec![
691692
vec![col("c1")],
@@ -725,7 +726,8 @@ async fn test_grouping_sets_count() -> Result<()> {
725726

726727
#[tokio::test]
727728
async fn test_grouping_set_array_agg_with_overflow() -> Result<()> {
728-
let ctx = SessionContext::new();
729+
let config = SessionConfig::new().with_target_partitions(1);
730+
let ctx = SessionContext::with_config(config);
729731

730732
let grouping_set_expr = Expr::GroupingSet(GroupingSet::GroupingSets(vec![
731733
vec![col("c1")],
@@ -795,6 +797,18 @@ async fn test_grouping_set_array_agg_with_overflow() -> Result<()> {
795797
Ok(())
796798
}
797799

800+
#[tokio::test]
801+
async fn test_read_partitioned() -> Result<()> {
802+
let config = SessionConfig::new().with_target_partitions(4);
803+
let ctx = SessionContext::with_config(config);
804+
805+
let df = aggregates_table(&ctx).await?;
806+
let plan = df.create_physical_plan().await?;
807+
808+
assert_eq!(plan.output_partitioning().partition_count(), 4);
809+
Ok(())
810+
}
811+
798812
#[tokio::test]
799813
async fn join_with_alias_filter() -> Result<()> {
800814
let join_ctx = create_join_context()?;

0 commit comments

Comments
 (0)