Skip to content

Commit a9a1319

Browse files
nuno-fariaalamb
andauthored
Enable setting default values for target_partitions and planning_concurrency (apache#15712)
* Enable setting default values for target_partitions and planning_concurrency * Fix doc test * Use transform to apply the mapping from 0 to the default parallelism --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent bdd52a7 commit a9a1319

3 files changed

Lines changed: 53 additions & 5 deletions

File tree

datafusion/common/src/config.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ config_namespace! {
300300
/// concurrency.
301301
///
302302
/// Defaults to the number of CPU cores on the system
303-
pub target_partitions: usize, default = get_available_parallelism()
303+
pub target_partitions: usize, transform = ExecutionOptions::normalized_parallelism, default = get_available_parallelism()
304304

305305
/// The default time zone
306306
///
@@ -316,7 +316,7 @@ config_namespace! {
316316
/// This is mostly use to plan `UNION` children in parallel.
317317
///
318318
/// Defaults to the number of CPU cores on the system
319-
pub planning_concurrency: usize, default = get_available_parallelism()
319+
pub planning_concurrency: usize, transform = ExecutionOptions::normalized_parallelism, default = get_available_parallelism()
320320

321321
/// When set to true, skips verifying that the schema produced by
322322
/// planning the input of `LogicalPlan::Aggregate` exactly matches the
@@ -739,6 +739,19 @@ config_namespace! {
739739
}
740740
}
741741

742+
impl ExecutionOptions {
743+
/// Returns the correct parallelism based on the provided `value`.
744+
/// If `value` is `"0"`, returns the default available parallelism, computed with
745+
/// `get_available_parallelism`. Otherwise, returns `value`.
746+
fn normalized_parallelism(value: &str) -> String {
747+
if value.parse::<usize>() == Ok(0) {
748+
get_available_parallelism().to_string()
749+
} else {
750+
value.to_owned()
751+
}
752+
}
753+
}
754+
742755
/// A key value pair, with a corresponding description
743756
#[derive(Debug)]
744757
pub struct ConfigEntry {

datafusion/execution/src/config.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,11 @@ impl SessionConfig {
193193
///
194194
/// [`target_partitions`]: datafusion_common::config::ExecutionOptions::target_partitions
195195
pub fn with_target_partitions(mut self, n: usize) -> Self {
196-
// partition count must be greater than zero
197-
assert!(n > 0);
198-
self.options.execution.target_partitions = n;
196+
self.options.execution.target_partitions = if n == 0 {
197+
datafusion_common::config::ExecutionOptions::default().target_partitions
198+
} else {
199+
n
200+
};
199201
self
200202
}
201203

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,39 @@ drop table t
149149
statement ok
150150
drop table t2
151151

152+
153+
############
154+
## 0 to represent the default value (target_partitions and planning_concurrency)
155+
###########
156+
157+
statement ok
158+
SET datafusion.execution.target_partitions = 3;
159+
160+
statement ok
161+
SET datafusion.execution.planning_concurrency = 3;
162+
163+
# when setting target_partitions and planning_concurrency to 3, their values will be 3
164+
query TB rowsort
165+
SELECT name, value = 3 FROM information_schema.df_settings WHERE name IN ('datafusion.execution.target_partitions', 'datafusion.execution.planning_concurrency');
166+
----
167+
datafusion.execution.planning_concurrency true
168+
datafusion.execution.target_partitions true
169+
170+
statement ok
171+
SET datafusion.execution.target_partitions = 0;
172+
173+
statement ok
174+
SET datafusion.execution.planning_concurrency = 0;
175+
176+
# when setting target_partitions and planning_concurrency to 0, their values will be equal to the
177+
# default values, which are different from 0 (which is invalid)
178+
query TB rowsort
179+
SELECT name, value = 0 FROM information_schema.df_settings WHERE name IN ('datafusion.execution.target_partitions', 'datafusion.execution.planning_concurrency');
180+
----
181+
datafusion.execution.planning_concurrency false
182+
datafusion.execution.target_partitions false
183+
184+
152185
############
153186
## SHOW VARIABLES should work
154187
###########

0 commit comments

Comments
 (0)