1717
1818//! This module provides a builder for creating LogicalPlans
1919
20- use std:: {
21- collections:: { HashMap , HashSet } ,
22- sync:: Arc ,
23- } ;
24-
25- use arrow:: {
26- datatypes:: { Schema , SchemaRef } ,
27- record_batch:: RecordBatch ,
28- } ;
29-
3020use crate :: datasource:: {
3121 empty:: EmptyTable ,
3222 file_format:: parquet:: { ParquetFormat , DEFAULT_PARQUET_EXTENSION } ,
@@ -37,6 +27,16 @@ use crate::datasource::{
3727use crate :: error:: { DataFusionError , Result } ;
3828use crate :: logical_plan:: plan:: ToStringifiedPlan ;
3929use crate :: prelude:: * ;
30+ use crate :: scalar:: ScalarValue ;
31+ use arrow:: {
32+ datatypes:: { DataType , Schema , SchemaRef } ,
33+ record_batch:: RecordBatch ,
34+ } ;
35+ use std:: convert:: TryFrom ;
36+ use std:: {
37+ collections:: { HashMap , HashSet } ,
38+ sync:: Arc ,
39+ } ;
4040
4141use super :: dfschema:: ToDFSchema ;
4242use super :: { exprlist_to_fields, Expr , JoinConstraint , JoinType , LogicalPlan , PlanType } ;
@@ -111,6 +111,80 @@ impl LogicalPlanBuilder {
111111 } )
112112 }
113113
114+ /// Create a values list based relation, and the schema is inferred from data. This will consume
115+ /// and mut the given value vec.
116+ ///
117+ /// By default, it assigns the names column1, column2, etc. to the columns of a VALUES table.
118+ /// The column names are not specified by the SQL standard and different database systems do it differently,
119+ /// so it's usually better to override the default names with a table alias list.
120+ pub fn values ( mut values : Vec < Vec < Expr > > ) -> Result < Self > {
121+ if values. is_empty ( ) {
122+ return Err ( DataFusionError :: Plan ( "Values list cannot be empty" . into ( ) ) ) ;
123+ }
124+ let n_cols = values[ 0 ] . len ( ) ;
125+ if n_cols == 0 {
126+ return Err ( DataFusionError :: Plan (
127+ "Values list cannot be zero length" . into ( ) ,
128+ ) ) ;
129+ }
130+ let empty_schema = DFSchema :: empty ( ) ;
131+ let mut field_types: Vec < Option < DataType > > = Vec :: with_capacity ( n_cols) ;
132+ for _ in 0 ..n_cols {
133+ field_types. push ( None ) ;
134+ }
135+ // hold all the null holes so that we can correct their data types later
136+ let mut nulls: Vec < ( usize , usize ) > = Vec :: new ( ) ;
137+ for ( i, row) in values. iter ( ) . enumerate ( ) {
138+ if row. len ( ) != n_cols {
139+ return Err ( DataFusionError :: Plan ( format ! (
140+ "Inconsistent data length across values list: got {} values in row {} but expected {}" ,
141+ row. len( ) ,
142+ i,
143+ n_cols
144+ ) ) ) ;
145+ }
146+ field_types = row
147+ . iter ( )
148+ . enumerate ( )
149+ . map ( |( j, expr) | {
150+ if let Expr :: Literal ( ScalarValue :: Utf8 ( None ) ) = expr {
151+ nulls. push ( ( i, j) ) ;
152+ Ok ( field_types[ j] . clone ( ) )
153+ } else {
154+ let data_type = expr. get_type ( & empty_schema) ?;
155+ if let Some ( prev_data_type) = & field_types[ j] {
156+ if prev_data_type != & data_type {
157+ return Err ( DataFusionError :: Plan ( format ! (
158+ "Inconsistent data type across values list at column {}" ,
159+ i
160+ ) ) ) ;
161+ }
162+ }
163+ Ok ( Some ( data_type) )
164+ }
165+ } )
166+ . collect :: < Result < Vec < Option < DataType > > > > ( ) ?;
167+ }
168+ let fields = field_types
169+ . iter ( )
170+ . enumerate ( )
171+ . map ( |( j, data_type) | {
172+ DFField :: new (
173+ None ,
174+ // naming is following convention https://www.postgresql.org/docs/current/queries-values.html
175+ & format ! ( "column{}" , j + 1 ) ,
176+ data_type. clone ( ) . unwrap_or ( DataType :: Utf8 ) ,
177+ true ,
178+ )
179+ } )
180+ . collect :: < Vec < _ > > ( ) ;
181+ for ( i, j) in nulls {
182+ values[ i] [ j] = Expr :: Literal ( ScalarValue :: try_from ( fields[ j] . data_type ( ) ) ?) ;
183+ }
184+ let schema = DFSchemaRef :: new ( DFSchema :: new ( fields) ?) ;
185+ Ok ( Self :: from ( LogicalPlan :: Values { schema, values } ) )
186+ }
187+
114188 /// Scan a memory data source
115189 pub fn scan_memory (
116190 partitions : Vec < Vec < RecordBatch > > ,
0 commit comments