Skip to content

Commit 9a8190a

Browse files
alambJefffrey
andauthored
Update sqlparser to 29.0.0 (#4770)
* Sql planner support for rollup/cube/grouping sets ast nodes * Trigger build * Update sqlparser to 29.0.0 * Update for api changes * Update one test * sqlparser update * fmt + clippy * reduce stack overhead Co-authored-by: Jefffrey <[email protected]>
1 parent acb8846 commit 9a8190a

8 files changed

Lines changed: 92 additions & 30 deletions

File tree

datafusion-cli/Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,4 @@ cranelift-module = { version = "0.89.0", optional = true }
4646
object_store = { version = "0.5.0", default-features = false, optional = true }
4747
parquet = { version = "29.0.0", default-features = false, optional = true }
4848
pyo3 = { version = "0.17.1", optional = true }
49-
sqlparser = "0.28"
49+
sqlparser = "0.29"

datafusion/core/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ pyo3 = { version = "0.17.1", optional = true }
9090
rand = "0.8"
9191
rayon = { version = "1.5", optional = true }
9292
smallvec = { version = "1.6", features = ["union"] }
93-
sqlparser = "0.28"
93+
sqlparser = "0.29"
9494
tempfile = "3"
9595
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
9696
tokio-stream = "0.1"
@@ -111,7 +111,6 @@ env_logger = "0.10"
111111
parquet-test-utils = { path = "../../parquet-test-utils" }
112112
rstest = "0.16.0"
113113
sqllogictest = "0.10.0"
114-
sqlparser = "0.28"
115114
test-utils = { path = "../../test-utils" }
116115
thiserror = "1.0.37"
117116

datafusion/core/tests/sql/explain_analyze.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -898,8 +898,7 @@ async fn explain_nested() {
898898
.set_bool(OPT_EXPLAIN_PHYSICAL_PLAN_ONLY, explain_phy_plan_flag);
899899
let ctx = SessionContext::with_config(config);
900900
let sql = "EXPLAIN explain select 1";
901-
let dataframe = ctx.sql(sql).await.unwrap();
902-
let err = dataframe.create_physical_plan().await.unwrap_err();
901+
let err = ctx.sql(sql).await.unwrap_err();
903902
assert!(err.to_string().contains("Explain must be root of the plan"));
904903
}
905904

datafusion/expr/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"]
3939
arrow = { version = "29.0.0", default-features = false }
4040
datafusion-common = { path = "../common", version = "15.0.0" }
4141
log = "^0.4"
42-
sqlparser = "0.28"
42+
sqlparser = "0.29"

datafusion/sql/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ arrow-schema = "29.0.0"
4141
datafusion-common = { path = "../common", version = "15.0.0" }
4242
datafusion-expr = { path = "../expr", version = "15.0.0" }
4343
log = "^0.4"
44-
sqlparser = "0.28"
44+
sqlparser = "0.29"

datafusion/sql/src/parser.rs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use sqlparser::{
2525
},
2626
dialect::{keywords::Keyword, Dialect, GenericDialect},
2727
parser::{Parser, ParserError},
28-
tokenizer::{Token, Tokenizer},
28+
tokenizer::{Token, TokenWithLocation, Tokenizer},
2929
};
3030
use std::{collections::HashMap, str::FromStr};
3131
use std::{collections::VecDeque, fmt};
@@ -124,7 +124,7 @@ impl<'a> DFParser<'a> {
124124
let tokens = tokenizer.tokenize()?;
125125

126126
Ok(DFParser {
127-
parser: Parser::new(tokens, dialect),
127+
parser: Parser::new(dialect).with_tokens(tokens),
128128
})
129129
}
130130

@@ -165,13 +165,17 @@ impl<'a> DFParser<'a> {
165165
}
166166

167167
/// Report an unexpected token
168-
fn expected<T>(&self, expected: &str, found: Token) -> Result<T, ParserError> {
168+
fn expected<T>(
169+
&self,
170+
expected: &str,
171+
found: TokenWithLocation,
172+
) -> Result<T, ParserError> {
169173
parser_err!(format!("Expected {expected}, found: {found}"))
170174
}
171175

172176
/// Parse a new expression
173177
pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
174-
match self.parser.peek_token() {
178+
match self.parser.peek_token().token {
175179
Token::Word(w) => {
176180
match w.keyword {
177181
Keyword::CREATE => {
@@ -227,7 +231,7 @@ impl<'a> DFParser<'a> {
227231
}
228232

229233
loop {
230-
if let Token::Word(_) = self.parser.peek_token() {
234+
if let Token::Word(_) = self.parser.peek_token().token {
231235
let identifier = self.parser.parse_identifier()?;
232236
partitions.push(identifier.to_string());
233237
} else {
@@ -262,7 +266,7 @@ impl<'a> DFParser<'a> {
262266
loop {
263267
if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
264268
constraints.push(constraint);
265-
} else if let Token::Word(_) = self.parser.peek_token() {
269+
} else if let Token::Word(_) = self.parser.peek_token().token {
266270
let column_def = self.parse_column_def()?;
267271
columns.push(column_def);
268272
} else {
@@ -379,19 +383,21 @@ impl<'a> DFParser<'a> {
379383

380384
/// Parses the set of valid formats
381385
fn parse_file_format(&mut self) -> Result<String, ParserError> {
382-
match self.parser.next_token() {
386+
let token = self.parser.next_token();
387+
match &token.token {
383388
Token::Word(w) => parse_file_type(&w.value),
384-
unexpected => self.expected("one of PARQUET, NDJSON, or CSV", unexpected),
389+
_ => self.expected("one of PARQUET, NDJSON, or CSV", token),
385390
}
386391
}
387392

388393
/// Parses the set of
389394
fn parse_file_compression_type(
390395
&mut self,
391396
) -> Result<CompressionTypeVariant, ParserError> {
392-
match self.parser.next_token() {
397+
let token = self.parser.next_token();
398+
match &token.token {
393399
Token::Word(w) => CompressionTypeVariant::from_str(&w.value),
394-
unexpected => self.expected("one of GZIP, BZIP2, XZ", unexpected),
400+
_ => self.expected("one of GZIP, BZIP2, XZ", token),
395401
}
396402
}
397403

datafusion/sql/src/planner.rs

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2235,15 +2235,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
22352235
normalize_ident(function.name.0[0].clone())
22362236
};
22372237

2238-
// first, check SQL reserved words
2239-
if name == "rollup" {
2240-
let args = self.function_args_to_expr(function.args, schema)?;
2241-
return Ok(Expr::GroupingSet(GroupingSet::Rollup(args)));
2242-
} else if name == "cube" {
2243-
let args = self.function_args_to_expr(function.args, schema)?;
2244-
return Ok(Expr::GroupingSet(GroupingSet::Cube(args)));
2245-
}
2246-
22472238
// next, scalar built-in
22482239
if let Ok(fun) = BuiltinScalarFunction::from_str(&name) {
22492240
let args = self.function_args_to_expr(function.args, schema)?;
@@ -2347,6 +2338,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
23472338
}
23482339
}
23492340

2341+
SQLExpr::Rollup(exprs) => self.sql_rollup_to_expr(exprs, schema, planner_context),
2342+
SQLExpr::Cube(exprs) => self.sql_cube_to_expr(exprs,schema, planner_context),
2343+
SQLExpr::GroupingSets(exprs) => self.sql_grouping_sets_to_expr(exprs, schema, planner_context),
2344+
23502345
SQLExpr::Floor { expr, field: _field } => {
23512346
let fun = BuiltinScalarFunction::Floor;
23522347
let args = vec![self.sql_expr_to_logical_expr(*expr, schema, planner_context)?];
@@ -2387,6 +2382,67 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
23872382
})
23882383
}
23892384

2385+
fn sql_rollup_to_expr(
2386+
&self,
2387+
exprs: Vec<Vec<SQLExpr>>,
2388+
schema: &DFSchema,
2389+
planner_context: &mut PlannerContext,
2390+
) -> Result<Expr> {
2391+
let args: Result<Vec<_>> = exprs
2392+
.into_iter()
2393+
.map(|v| {
2394+
if v.len() != 1 {
2395+
Err(DataFusionError::Internal(
2396+
"Tuple expressions are not supported for Rollup expressions"
2397+
.to_string(),
2398+
))
2399+
} else {
2400+
self.sql_expr_to_logical_expr(v[0].clone(), schema, planner_context)
2401+
}
2402+
})
2403+
.collect();
2404+
Ok(Expr::GroupingSet(GroupingSet::Rollup(args?)))
2405+
}
2406+
2407+
fn sql_cube_to_expr(
2408+
&self,
2409+
exprs: Vec<Vec<SQLExpr>>,
2410+
schema: &DFSchema,
2411+
planner_context: &mut PlannerContext,
2412+
) -> Result<Expr> {
2413+
let args: Result<Vec<_>> = exprs
2414+
.into_iter()
2415+
.map(|v| {
2416+
if v.len() != 1 {
2417+
Err(DataFusionError::Internal(
2418+
"Tuple expressions not are supported for Cube expressions"
2419+
.to_string(),
2420+
))
2421+
} else {
2422+
self.sql_expr_to_logical_expr(v[0].clone(), schema, planner_context)
2423+
}
2424+
})
2425+
.collect();
2426+
Ok(Expr::GroupingSet(GroupingSet::Cube(args?)))
2427+
}
2428+
2429+
fn sql_grouping_sets_to_expr(
2430+
&self,
2431+
exprs: Vec<Vec<SQLExpr>>,
2432+
schema: &DFSchema,
2433+
planner_context: &mut PlannerContext,
2434+
) -> Result<Expr> {
2435+
let args: Result<Vec<Vec<_>>> = exprs
2436+
.into_iter()
2437+
.map(|v| {
2438+
v.into_iter()
2439+
.map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context))
2440+
.collect()
2441+
})
2442+
.collect();
2443+
Ok(Expr::GroupingSet(GroupingSet::GroupingSets(args?)))
2444+
}
2445+
23902446
fn parse_exists_subquery(
23912447
&self,
23922448
subquery: Query,
@@ -2634,6 +2690,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
26342690
SQLExpr::Identifier(i) => i.to_string(),
26352691
SQLExpr::Value(v) => match v {
26362692
Value::SingleQuotedString(s) => s.to_string(),
2693+
Value::DollarQuotedString(s) => s.to_string(),
26372694
Value::Number(_, _) | Value::Boolean(_) => v.to_string(),
26382695
Value::DoubleQuotedString(_)
26392696
| Value::UnQuotedString(_)
@@ -5664,11 +5721,12 @@ mod tests {
56645721
quick_test(sql, expected);
56655722
}
56665723

5667-
#[ignore] // see https://github.com/apache/arrow-datafusion/issues/2469
56685724
#[test]
56695725
fn aggregate_with_grouping_sets() {
56705726
let sql = "SELECT id, state, age, COUNT(*) FROM person GROUP BY id, GROUPING SETS ((state), (state, age), (id, state))";
5671-
let expected = "TBD";
5727+
let expected = "Projection: person.id, person.state, person.age, COUNT(UInt8(1))\
5728+
\n Aggregate: groupBy=[[person.id, GROUPING SETS ((person.state), (person.state, person.age), (person.id, person.state))]], aggr=[[COUNT(UInt8(1))]]\
5729+
\n TableScan: person";
56725730
quick_test(sql, expected);
56735731
}
56745732

0 commit comments

Comments
 (0)