Skip to content

Commit e560ece

Browse files
committed
Merge remote-tracking branch 'apache/main' into sgrebnov/improve-join-upstream
2 parents 936b076 + 1fd6116 commit e560ece

12 files changed

Lines changed: 201 additions & 53 deletions

File tree

datafusion/core/src/bin/print_functions_docs.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ fn print_docs(
195195
);
196196
}
197197

198+
if let Some(alt_syntax) = &documentation.alternative_syntax {
199+
let _ = writeln!(docs, "#### Alternative Syntax\n");
200+
for syntax in alt_syntax {
201+
let _ = writeln!(docs, "```sql\n{}\n```", syntax);
202+
}
203+
}
204+
198205
// next, aliases
199206
if !f.get_aliases().is_empty() {
200207
let _ = writeln!(docs, "#### Aliases");

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,15 @@ pub fn validate_unique_names<'a>(
14821482
/// [`TypeCoercionRewriter::coerce_union`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/struct.TypeCoercionRewriter.html#method.coerce_union
14831483
/// [`coerce_union_schema`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/fn.coerce_union_schema.html
14841484
pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalPlan> {
1485+
if left_plan.schema().fields().len() != right_plan.schema().fields().len() {
1486+
return plan_err!(
1487+
"UNION queries have different number of columns: \
1488+
left has {} columns whereas right has {} columns",
1489+
left_plan.schema().fields().len(),
1490+
right_plan.schema().fields().len()
1491+
);
1492+
}
1493+
14851494
// Temporarily use the schema from the left input and later rely on the analyzer to
14861495
// coerce the two schemas into a common one.
14871496

datafusion/expr/src/udf_docs.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ pub struct Documentation {
4747
/// Left member of a pair is the argument name, right is a
4848
/// description for the argument
4949
pub arguments: Option<Vec<(String, String)>>,
50+
/// A list of alternative syntax examples for a function
51+
pub alternative_syntax: Option<Vec<String>>,
5052
/// Related functions if any. Values should match the related
5153
/// udf's name exactly. Related udf's must be of the same
5254
/// UDF type (scalar, aggregate or window) for proper linking to
@@ -96,6 +98,7 @@ pub struct DocumentationBuilder {
9698
pub syntax_example: Option<String>,
9799
pub sql_example: Option<String>,
98100
pub arguments: Option<Vec<(String, String)>>,
101+
pub alternative_syntax: Option<Vec<String>>,
99102
pub related_udfs: Option<Vec<String>>,
100103
}
101104

@@ -107,6 +110,7 @@ impl DocumentationBuilder {
107110
syntax_example: None,
108111
sql_example: None,
109112
arguments: None,
113+
alternative_syntax: None,
110114
related_udfs: None,
111115
}
112116
}
@@ -172,6 +176,13 @@ impl DocumentationBuilder {
172176
self.with_argument(arg_name, description)
173177
}
174178

179+
pub fn with_alternative_syntax(mut self, syntax_name: impl Into<String>) -> Self {
180+
let mut alternative_syntax_array = self.alternative_syntax.unwrap_or_default();
181+
alternative_syntax_array.push(syntax_name.into());
182+
self.alternative_syntax = Some(alternative_syntax_array);
183+
self
184+
}
185+
175186
pub fn with_related_udf(mut self, related_udf: impl Into<String>) -> Self {
176187
let mut related = self.related_udfs.unwrap_or_default();
177188
related.push(related_udf.into());
@@ -186,6 +197,7 @@ impl DocumentationBuilder {
186197
syntax_example,
187198
sql_example,
188199
arguments,
200+
alternative_syntax,
189201
related_udfs,
190202
} = self;
191203

@@ -205,6 +217,7 @@ impl DocumentationBuilder {
205217
syntax_example: syntax_example.unwrap(),
206218
sql_example,
207219
arguments,
220+
alternative_syntax,
208221
related_udfs,
209222
})
210223
}

datafusion/functions/src/unicode/strpos.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ fn get_strpos_doc() -> &'static Documentation {
9797
```"#)
9898
.with_standard_argument("str", Some("String"))
9999
.with_argument("substr", "Substring expression to search for.")
100+
.with_alternative_syntax("position(substr in origstr)")
100101
.build()
101102
.unwrap()
102103
})

datafusion/sql/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ strum = { version = "0.26.1", features = ["derive"] }
5656
ctor = { workspace = true }
5757
datafusion-functions = { workspace = true, default-features = true }
5858
datafusion-functions-aggregate = { workspace = true }
59+
datafusion-functions-nested = { workspace = true }
5960
datafusion-functions-window = { workspace = true }
6061
env_logger = { workspace = true }
6162
paste = "^1.0"

datafusion/sql/src/unparser/expr.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use datafusion_expr::expr::Unnest;
1819
use sqlparser::ast::Value::SingleQuotedString;
1920
use sqlparser::ast::{
2021
self, BinaryOperator, Expr as AstExpr, Function, Ident, Interval, ObjectName,
@@ -466,7 +467,7 @@ impl Unparser<'_> {
466467
Ok(ast::Expr::Value(ast::Value::Placeholder(p.id.to_string())))
467468
}
468469
Expr::OuterReferenceColumn(_, col) => self.col_to_sql(col),
469-
Expr::Unnest(_) => not_impl_err!("Unsupported Expr conversion: {expr:?}"),
470+
Expr::Unnest(unnest) => self.unnest_to_sql(unnest),
470471
}
471472
}
472473

@@ -1340,6 +1341,29 @@ impl Unparser<'_> {
13401341
}
13411342
}
13421343

1344+
/// Converts an UNNEST operation to an AST expression by wrapping it as a function call,
1345+
/// since there is no direct representation for UNNEST in the AST.
1346+
fn unnest_to_sql(&self, unnest: &Unnest) -> Result<ast::Expr> {
1347+
let args = self.function_args_to_sql(std::slice::from_ref(&unnest.expr))?;
1348+
1349+
Ok(ast::Expr::Function(Function {
1350+
name: ast::ObjectName(vec![Ident {
1351+
value: "UNNEST".to_string(),
1352+
quote_style: None,
1353+
}]),
1354+
args: ast::FunctionArguments::List(ast::FunctionArgumentList {
1355+
duplicate_treatment: None,
1356+
args,
1357+
clauses: vec![],
1358+
}),
1359+
filter: None,
1360+
null_treatment: None,
1361+
over: None,
1362+
within_group: vec![],
1363+
parameters: ast::FunctionArguments::None,
1364+
}))
1365+
}
1366+
13431367
fn arrow_dtype_to_ast_dtype(&self, data_type: &DataType) -> Result<ast::DataType> {
13441368
match data_type {
13451369
DataType::Null => {
@@ -1855,6 +1879,15 @@ mod tests {
18551879
}),
18561880
r#"CAST(a AS DECIMAL(12,0))"#,
18571881
),
1882+
(
1883+
Expr::Unnest(Unnest {
1884+
expr: Box::new(Expr::Column(Column {
1885+
relation: Some(TableReference::partial("schema", "table")),
1886+
name: "array_col".to_string(),
1887+
})),
1888+
}),
1889+
r#"UNNEST("schema"."table".array_col)"#,
1890+
),
18581891
];
18591892

18601893
for (expr, expected) in tests {

datafusion/sql/src/unparser/plan.rs

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ use super::{
2626
subquery_alias_inner_query_and_columns, TableAliasRewriter,
2727
},
2828
utils::{
29-
find_agg_node_within_select, find_window_nodes_within_select,
30-
try_transform_to_simple_table_scan_with_filters, unproject_sort_expr,
31-
unproject_window_exprs,
29+
find_agg_node_within_select, find_unnest_node_within_select,
30+
find_window_nodes_within_select, try_transform_to_simple_table_scan_with_filters,
31+
unproject_sort_expr, unproject_unnest_expr, unproject_window_exprs,
3232
},
3333
Unparser,
3434
};
@@ -174,15 +174,24 @@ impl Unparser<'_> {
174174
p: &Projection,
175175
select: &mut SelectBuilder,
176176
) -> Result<()> {
177+
let mut exprs = p.expr.clone();
178+
179+
// If an Unnest node is found within the select, find and unproject the unnest column
180+
if let Some(unnest) = find_unnest_node_within_select(plan) {
181+
exprs = exprs
182+
.into_iter()
183+
.map(|e| unproject_unnest_expr(e, unnest))
184+
.collect::<Result<Vec<_>>>()?;
185+
};
186+
177187
match (
178188
find_agg_node_within_select(plan, true),
179189
find_window_nodes_within_select(plan, None, true),
180190
) {
181191
(Some(agg), window) => {
182192
let window_option = window.as_deref();
183-
let items = p
184-
.expr
185-
.iter()
193+
let items = exprs
194+
.into_iter()
186195
.map(|proj_expr| {
187196
let unproj = unproject_agg_exprs(proj_expr, agg, window_option)?;
188197
self.select_item_to_sql(&unproj)
@@ -199,9 +208,8 @@ impl Unparser<'_> {
199208
));
200209
}
201210
(None, Some(window)) => {
202-
let items = p
203-
.expr
204-
.iter()
211+
let items = exprs
212+
.into_iter()
205213
.map(|proj_expr| {
206214
let unproj = unproject_window_exprs(proj_expr, &window)?;
207215
self.select_item_to_sql(&unproj)
@@ -211,8 +219,7 @@ impl Unparser<'_> {
211219
select.projection(items);
212220
}
213221
_ => {
214-
let items = p
215-
.expr
222+
let items = exprs
216223
.iter()
217224
.map(|e| self.select_item_to_sql(e))
218225
.collect::<Result<Vec<_>>>()?;
@@ -319,7 +326,8 @@ impl Unparser<'_> {
319326
if let Some(agg) =
320327
find_agg_node_within_select(plan, select.already_projected())
321328
{
322-
let unprojected = unproject_agg_exprs(&filter.predicate, agg, None)?;
329+
let unprojected =
330+
unproject_agg_exprs(filter.predicate.clone(), agg, None)?;
323331
let filter_expr = self.expr_to_sql(&unprojected)?;
324332
select.having(Some(filter_expr));
325333
} else {
@@ -652,6 +660,28 @@ impl Unparser<'_> {
652660
Ok(())
653661
}
654662
LogicalPlan::Extension(_) => not_impl_err!("Unsupported operator: {plan:?}"),
663+
LogicalPlan::Unnest(unnest) => {
664+
if !unnest.struct_type_columns.is_empty() {
665+
return internal_err!(
666+
"Struct type columns are not currently supported in UNNEST: {:?}",
667+
unnest.struct_type_columns
668+
);
669+
}
670+
671+
// In the case of UNNEST, the Unnest node is followed by a duplicate Projection node that we should skip.
672+
// Otherwise, there will be a duplicate SELECT clause.
673+
// | Projection: table.col1, UNNEST(table.col2)
674+
// | Unnest: UNNEST(table.col2)
675+
// | Projection: table.col1, table.col2 AS UNNEST(table.col2)
676+
// | Filter: table.col3 = Int64(3)
677+
// | TableScan: table projection=None
678+
if let LogicalPlan::Projection(p) = unnest.input.as_ref() {
679+
// continue with projection input
680+
self.select_to_sql_recursively(&p.input, query, select, relation)
681+
} else {
682+
internal_err!("Unnest input is not a Projection: {unnest:?}")
683+
}
684+
}
655685
_ => not_impl_err!("Unsupported operator: {plan:?}"),
656686
}
657687
}

0 commit comments

Comments
 (0)