Skip to content

Commit 0a2c027

Browse files
jonathanc-nalambjayzhan211
authored
chore: deprecate ValuesExec in favour of MemoryExec (#14032)
* chore: deprecate `ValuesExec` in favour of `MemoryExec` * clippy fix * Update datafusion/physical-plan/src/values.rs Co-authored-by: Andrew Lamb <[email protected]> * change to memoryexec * Update datafusion/physical-plan/src/memory.rs Co-authored-by: Jay Zhan <[email protected]> * use compute properties * clippy fix --------- Co-authored-by: Andrew Lamb <[email protected]> Co-authored-by: Jay Zhan <[email protected]>
1 parent 167c11e commit 0a2c027

File tree

6 files changed

+214
-12
lines changed

6 files changed

+214
-12
lines changed

datafusion/core/src/physical_planner.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ use crate::physical_plan::repartition::RepartitionExec;
5454
use crate::physical_plan::sorts::sort::SortExec;
5555
use crate::physical_plan::union::UnionExec;
5656
use crate::physical_plan::unnest::UnnestExec;
57-
use crate::physical_plan::values::ValuesExec;
5857
use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
5958
use crate::physical_plan::{
6059
displayable, windows, ExecutionPlan, ExecutionPlanProperties, InputOrderMode,
@@ -466,7 +465,8 @@ impl DefaultPhysicalPlanner {
466465
.collect::<Result<Vec<Arc<dyn PhysicalExpr>>>>()
467466
})
468467
.collect::<Result<Vec<_>>>()?;
469-
let value_exec = ValuesExec::try_new(SchemaRef::new(exec_schema), exprs)?;
468+
let value_exec =
469+
MemoryExec::try_new_as_values(SchemaRef::new(exec_schema), exprs)?;
470470
Arc::new(value_exec)
471471
}
472472
LogicalPlan::EmptyRelation(EmptyRelation {

datafusion/physical-plan/src/memory.rs

Lines changed: 189 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,17 @@ use std::sync::Arc;
2424
use std::task::{Context, Poll};
2525

2626
use super::{
27-
common, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
28-
RecordBatchStream, SendableRecordBatchStream, Statistics,
27+
common, ColumnarValue, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
28+
PhysicalExpr, PlanProperties, RecordBatchStream, SendableRecordBatchStream,
29+
Statistics,
2930
};
3031
use crate::execution_plan::{Boundedness, EmissionType};
3132

3233
use arrow::datatypes::SchemaRef;
3334
use arrow::record_batch::RecordBatch;
34-
use datafusion_common::{internal_err, project_schema, Result};
35+
use arrow_array::RecordBatchOptions;
36+
use arrow_schema::Schema;
37+
use datafusion_common::{internal_err, plan_err, project_schema, Result, ScalarValue};
3538
use datafusion_execution::memory_pool::MemoryReservation;
3639
use datafusion_execution::TaskContext;
3740
use datafusion_physical_expr::equivalence::ProjectionMapping;
@@ -174,6 +177,96 @@ impl MemoryExec {
174177
})
175178
}
176179

180+
/// Create a new execution plan from a list of constant values (`ValuesExec`)
181+
pub fn try_new_as_values(
182+
schema: SchemaRef,
183+
data: Vec<Vec<Arc<dyn PhysicalExpr>>>,
184+
) -> Result<Self> {
185+
if data.is_empty() {
186+
return plan_err!("Values list cannot be empty");
187+
}
188+
189+
let n_row = data.len();
190+
let n_col = schema.fields().len();
191+
192+
// We have this single row batch as a placeholder to satisfy evaluation argument
193+
// and generate a single output row
194+
let placeholder_schema = Arc::new(Schema::empty());
195+
let placeholder_batch = RecordBatch::try_new_with_options(
196+
Arc::clone(&placeholder_schema),
197+
vec![],
198+
&RecordBatchOptions::new().with_row_count(Some(1)),
199+
)?;
200+
201+
// Evaluate each column
202+
let arrays = (0..n_col)
203+
.map(|j| {
204+
(0..n_row)
205+
.map(|i| {
206+
let expr = &data[i][j];
207+
let result = expr.evaluate(&placeholder_batch)?;
208+
209+
match result {
210+
ColumnarValue::Scalar(scalar) => Ok(scalar),
211+
ColumnarValue::Array(array) if array.len() == 1 => {
212+
ScalarValue::try_from_array(&array, 0)
213+
}
214+
ColumnarValue::Array(_) => {
215+
plan_err!("Cannot have array values in a values list")
216+
}
217+
}
218+
})
219+
.collect::<Result<Vec<_>>>()
220+
.and_then(ScalarValue::iter_to_array)
221+
})
222+
.collect::<Result<Vec<_>>>()?;
223+
224+
let batch = RecordBatch::try_new_with_options(
225+
Arc::clone(&schema),
226+
arrays,
227+
&RecordBatchOptions::new().with_row_count(Some(n_row)),
228+
)?;
229+
230+
let partitions = vec![batch];
231+
Self::try_new_from_batches(Arc::clone(&schema), partitions)
232+
}
233+
234+
/// Create a new plan using the provided schema and batches.
235+
///
236+
/// Errors if any of the batches don't match the provided schema, or if no
237+
/// batches are provided.
238+
pub fn try_new_from_batches(
239+
schema: SchemaRef,
240+
batches: Vec<RecordBatch>,
241+
) -> Result<Self> {
242+
if batches.is_empty() {
243+
return plan_err!("Values list cannot be empty");
244+
}
245+
246+
for batch in &batches {
247+
let batch_schema = batch.schema();
248+
if batch_schema != schema {
249+
return plan_err!(
250+
"Batch has invalid schema. Expected: {}, got: {}",
251+
schema,
252+
batch_schema
253+
);
254+
}
255+
}
256+
257+
let partitions = vec![batches];
258+
let cache = Self::compute_properties(Arc::clone(&schema), &[], &partitions);
259+
Ok(Self {
260+
partitions,
261+
schema: Arc::clone(&schema),
262+
projected_schema: Arc::clone(&schema),
263+
projection: None,
264+
sort_information: vec![],
265+
cache,
266+
show_sizes: true,
267+
})
268+
}
269+
177270
/// Set `show_sizes` to determine whether to display partition sizes
178271
pub fn with_show_sizes(mut self, show_sizes: bool) -> Self {
179272
self.show_sizes = show_sizes;
@@ -696,3 +789,96 @@ mod lazy_memory_tests {
696789
Ok(())
697790
}
698791
}
792+
793+
#[cfg(test)]
794+
mod tests {
795+
use super::*;
796+
use crate::expressions::lit;
797+
use crate::test::{self, make_partition};
798+
799+
use arrow_schema::{DataType, Field};
800+
use datafusion_common::stats::{ColumnStatistics, Precision};
801+
802+
#[tokio::test]
803+
async fn values_empty_case() -> Result<()> {
804+
let schema = test::aggr_test_schema();
805+
let empty = MemoryExec::try_new_as_values(schema, vec![]);
806+
assert!(empty.is_err());
807+
Ok(())
808+
}
809+
810+
#[test]
811+
fn new_exec_with_batches() {
812+
let batch = make_partition(7);
813+
let schema = batch.schema();
814+
let batches = vec![batch.clone(), batch];
815+
let _exec = MemoryExec::try_new_from_batches(schema, batches).unwrap();
816+
}
817+
818+
#[test]
819+
fn new_exec_with_batches_empty() {
820+
let batch = make_partition(7);
821+
let schema = batch.schema();
822+
let _ = MemoryExec::try_new_from_batches(schema, Vec::new()).unwrap_err();
823+
}
824+
825+
#[test]
826+
fn new_exec_with_batches_invalid_schema() {
827+
let batch = make_partition(7);
828+
let batches = vec![batch.clone(), batch];
829+
830+
let invalid_schema = Arc::new(Schema::new(vec![
831+
Field::new("col0", DataType::UInt32, false),
832+
Field::new("col1", DataType::Utf8, false),
833+
]));
834+
let _ = MemoryExec::try_new_from_batches(invalid_schema, batches).unwrap_err();
835+
}
836+
837+
// Test issue: https://github.com/apache/datafusion/issues/8763
838+
#[test]
839+
fn new_exec_with_non_nullable_schema() {
840+
let schema = Arc::new(Schema::new(vec![Field::new(
841+
"col0",
842+
DataType::UInt32,
843+
false,
844+
)]));
845+
let _ = MemoryExec::try_new_as_values(Arc::clone(&schema), vec![vec![lit(1u32)]])
846+
.unwrap();
847+
// Test that a null value is rejected
848+
let _ = MemoryExec::try_new_as_values(
849+
schema,
850+
vec![vec![lit(ScalarValue::UInt32(None))]],
851+
)
852+
.unwrap_err();
853+
}
854+
855+
#[test]
856+
fn values_stats_with_nulls_only() -> Result<()> {
857+
let data = vec![
858+
vec![lit(ScalarValue::Null)],
859+
vec![lit(ScalarValue::Null)],
860+
vec![lit(ScalarValue::Null)],
861+
];
862+
let rows = data.len();
863+
let values = MemoryExec::try_new_as_values(
864+
Arc::new(Schema::new(vec![Field::new("col0", DataType::Null, true)])),
865+
data,
866+
)?;
867+
868+
assert_eq!(
869+
values.statistics()?,
870+
Statistics {
871+
num_rows: Precision::Exact(rows),
872+
total_byte_size: Precision::Exact(8), // not important
873+
column_statistics: vec![ColumnStatistics {
874+
null_count: Precision::Exact(rows), // there are only nulls
875+
distinct_count: Precision::Absent,
876+
max_value: Precision::Absent,
877+
min_value: Precision::Absent,
878+
},],
879+
}
880+
);
881+
882+
Ok(())
883+
}
884+
}

datafusion/physical-plan/src/values.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ use datafusion_execution::TaskContext;
3434
use datafusion_physical_expr::EquivalenceProperties;
3535

3636
/// Execution plan for values list based relation (produces constant rows)
37+
#[deprecated(since = "45.0.0", note = "Use `MemoryExec::try_new_as_values` instead")]
3738
#[derive(Debug, Clone)]
3839
pub struct ValuesExec {
3940
/// The schema
@@ -44,6 +45,7 @@ pub struct ValuesExec {
4445
cache: PlanProperties,
4546
}
4647

48+
#[allow(deprecated)]
4749
impl ValuesExec {
4850
/// Create a new values exec from data as expr
4951
pub fn try_new(
@@ -117,6 +119,7 @@ impl ValuesExec {
117119
}
118120

119121
let cache = Self::compute_properties(Arc::clone(&schema));
122+
#[allow(deprecated)]
120123
Ok(ValuesExec {
121124
schema,
122125
data: batches,
@@ -126,6 +129,7 @@ impl ValuesExec {
126129

127130
/// Provides the data
128131
pub fn data(&self) -> Vec<RecordBatch> {
132+
#[allow(deprecated)]
129133
self.data.clone()
130134
}
131135

@@ -140,6 +144,7 @@ impl ValuesExec {
140144
}
141145
}
142146

147+
#[allow(deprecated)]
143148
impl DisplayAs for ValuesExec {
144149
fn fmt_as(
145150
&self,
@@ -154,6 +159,7 @@ impl DisplayAs for ValuesExec {
154159
}
155160
}
156161

162+
#[allow(deprecated)]
157163
impl ExecutionPlan for ValuesExec {
158164
fn name(&self) -> &'static str {
159165
"ValuesExec"
@@ -165,6 +171,7 @@ impl ExecutionPlan for ValuesExec {
165171
}
166172

167173
fn properties(&self) -> &PlanProperties {
174+
#[allow(deprecated)]
168175
&self.cache
169176
}
170177

@@ -176,6 +183,7 @@ impl ExecutionPlan for ValuesExec {
176183
self: Arc<Self>,
177184
_: Vec<Arc<dyn ExecutionPlan>>,
178185
) -> Result<Arc<dyn ExecutionPlan>> {
186+
#[allow(deprecated)]
179187
ValuesExec::try_new_from_batches(Arc::clone(&self.schema), self.data.clone())
180188
.map(|e| Arc::new(e) as _)
181189
}
@@ -194,6 +202,7 @@ impl ExecutionPlan for ValuesExec {
194202

195203
Ok(Box::pin(MemoryStream::try_new(
196204
self.data(),
205+
#[allow(deprecated)]
197206
Arc::clone(&self.schema),
198207
None,
199208
)?))
@@ -203,6 +212,7 @@ impl ExecutionPlan for ValuesExec {
203212
let batch = self.data();
204213
Ok(common::compute_record_batch_statistics(
205214
&[batch],
215+
#[allow(deprecated)]
206216
&self.schema,
207217
None,
208218
))
@@ -221,6 +231,7 @@ mod tests {
221231
#[tokio::test]
222232
async fn values_empty_case() -> Result<()> {
223233
let schema = test::aggr_test_schema();
234+
#[allow(deprecated)]
224235
let empty = ValuesExec::try_new(schema, vec![]);
225236
assert!(empty.is_err());
226237
Ok(())
@@ -231,14 +242,15 @@ mod tests {
231242
let batch = make_partition(7);
232243
let schema = batch.schema();
233244
let batches = vec![batch.clone(), batch];
234-
245+
#[allow(deprecated)]
235246
let _exec = ValuesExec::try_new_from_batches(schema, batches).unwrap();
236247
}
237248

238249
#[test]
239250
fn new_exec_with_batches_empty() {
240251
let batch = make_partition(7);
241252
let schema = batch.schema();
253+
#[allow(deprecated)]
242254
let _ = ValuesExec::try_new_from_batches(schema, Vec::new()).unwrap_err();
243255
}
244256

@@ -251,6 +263,7 @@ mod tests {
251263
Field::new("col0", DataType::UInt32, false),
252264
Field::new("col1", DataType::Utf8, false),
253265
]));
266+
#[allow(deprecated)]
254267
let _ = ValuesExec::try_new_from_batches(invalid_schema, batches).unwrap_err();
255268
}
256269

@@ -262,8 +275,10 @@ mod tests {
262275
DataType::UInt32,
263276
false,
264277
)]));
278+
#[allow(deprecated)]
265279
let _ = ValuesExec::try_new(Arc::clone(&schema), vec![vec![lit(1u32)]]).unwrap();
266280
// Test that a null value is rejected
281+
#[allow(deprecated)]
267282
let _ = ValuesExec::try_new(schema, vec![vec![lit(ScalarValue::UInt32(None))]])
268283
.unwrap_err();
269284
}
@@ -276,6 +291,7 @@ mod tests {
276291
vec![lit(ScalarValue::Null)],
277292
];
278293
let rows = data.len();
294+
#[allow(deprecated)]
279295
let values = ValuesExec::try_new(
280296
Arc::new(Schema::new(vec![Field::new("col0", DataType::Null, true)])),
281297
data,

datafusion/sqllogictest/test_files/insert_to_external.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ physical_plan
128128
01)DataSinkExec: sink=CsvSink(file_groups=[])
129129
02)--SortExec: expr=[a@0 ASC NULLS LAST, b@1 DESC], preserve_partitioning=[false]
130130
03)----ProjectionExec: expr=[column1@0 as a, column2@1 as b]
131-
04)------ValuesExec
131+
04)------MemoryExec: partitions=1, partition_sizes=[1]
132132

133133
query I
134134
INSERT INTO ordered_insert_test values (5, 1), (4, 2), (7,7), (7,8), (7,9), (7,10), (3, 3), (2, 4), (1, 5);

datafusion/sqllogictest/test_files/order.slt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -786,15 +786,15 @@ physical_plan
786786
08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
787787
09)----------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
788788
10)------------------ProjectionExec: expr=[column1@0 as t]
789-
11)--------------------ValuesExec
789+
11)--------------------MemoryExec: partitions=1, partition_sizes=[1]
790790
12)------ProjectionExec: expr=[1 as m, t@0 as t]
791791
13)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
792792
14)----------CoalesceBatchesExec: target_batch_size=8192
793793
15)------------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=2
794794
16)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
795795
17)----------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
796796
18)------------------ProjectionExec: expr=[column1@0 as t]
797-
19)--------------------ValuesExec
797+
19)--------------------MemoryExec: partitions=1, partition_sizes=[1]
798798

799799
#####
800800
# Multi column sorting with lists

0 commit comments

Comments
 (0)