-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20854][SQL] Extend hint syntax to support expressions #18086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
03a4281
72cf1d1
2c96a8d
fa11b0b
21ad3aa
84c0746
dff75c8
5439468
d386cdf
6e40301
14a6150
394d644
1e7f95d
8daa05e
09635a9
3290970
7776ae6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -407,7 +407,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
| val withWindow = withDistinct.optionalMap(windows)(withWindows) | ||
|
|
||
| // Hint | ||
| withWindow.optionalMap(hint)(withHints) | ||
| hints.asScala.foldRight(withWindow)(withHints) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why we construct the hint from right to left?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so that |
||
| } | ||
| } | ||
|
|
||
|
|
@@ -533,13 +533,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
| } | ||
|
|
||
| /** | ||
| * Add a [[UnresolvedHint]] to a logical plan. | ||
| * Add [[UnresolvedHint]]s to a logical plan. | ||
| */ | ||
| private def withHints( | ||
| ctx: HintContext, | ||
| query: LogicalPlan): LogicalPlan = withOrigin(ctx) { | ||
| val stmt = ctx.hintStatement | ||
| UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(_.getText), query) | ||
| var plan = query | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. using
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Honestly I think foldLeft is almost always a bad idea ...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used foldRight somewhere too. Why is it a bad idea?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i always find a loop simpler to reason about ... |
||
| ctx.hintStatements.asScala.reverse.foreach { case stmt => | ||
| plan = UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(expression), plan) | ||
| } | ||
| plan | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql | ||
|
|
||
| import org.apache.spark.sql.catalyst.analysis.AnalysisTest | ||
| import org.apache.spark.sql.catalyst.dsl.expressions._ | ||
| import org.apache.spark.sql.catalyst.dsl.plans._ | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
|
|
||
| class DSLHintSuite extends AnalysisTest { | ||
| lazy val a = 'a.int | ||
| lazy val b = 'b.string | ||
| lazy val c = 'c.string | ||
| lazy val r1 = LocalRelation(a, b, c) | ||
|
|
||
| test("various hint parameters") { | ||
| comparePlans( | ||
| r1.hint("hint1"), | ||
| UnresolvedHint("hint1", Seq(), r1) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| r1.hint("hint1", 1, "a"), | ||
| UnresolvedHint("hint1", Seq(1, "a"), r1) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| r1.hint("hint1", 1, $"a"), | ||
| UnresolvedHint("hint1", Seq(1, $"a"), r1) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| r1.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")), | ||
| UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")), r1) | ||
| ) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,7 @@ | |
| package org.apache.spark.sql.catalyst.parser | ||
|
|
||
| import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} | ||
| import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction} | ||
| import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction} | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.plans._ | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
|
|
@@ -527,47 +527,117 @@ class PlanParserSuite extends PlanTest { | |
| val m = intercept[ParseException] { | ||
| parsePlan("SELECT /*+ HINT() */ * FROM t") | ||
| }.getMessage | ||
| assert(m.contains("no viable alternative at input")) | ||
|
|
||
| // Hive compatibility: No database. | ||
| val m2 = intercept[ParseException] { | ||
| parsePlan("SELECT /*+ MAPJOIN(default.t) */ * from default.t") | ||
| }.getMessage | ||
| assert(m2.contains("mismatched input '.' expecting {')', ','}")) | ||
| assert(m.contains("mismatched input")) | ||
|
|
||
| // Disallow space as the delimiter. | ||
| val m3 = intercept[ParseException] { | ||
| parsePlan("SELECT /*+ INDEX(a b c) */ * from default.t") | ||
| }.getMessage | ||
| assert(m3.contains("mismatched input 'b' expecting {')', ','}")) | ||
| assert(m3.contains("mismatched input 'b' expecting")) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT */ * FROM t"), | ||
| UnresolvedHint("HINT", Seq.empty, table("t").select(star()))) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ BROADCASTJOIN(u) */ * FROM t"), | ||
| UnresolvedHint("BROADCASTJOIN", Seq("u"), table("t").select(star()))) | ||
| UnresolvedHint("BROADCASTJOIN", Seq($"u"), table("t").select(star()))) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ MAPJOIN(u) */ * FROM t"), | ||
| UnresolvedHint("MAPJOIN", Seq("u"), table("t").select(star()))) | ||
| UnresolvedHint("MAPJOIN", Seq($"u"), table("t").select(star()))) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t"), | ||
| UnresolvedHint("STREAMTABLE", Seq("a", "b", "c"), table("t").select(star()))) | ||
| UnresolvedHint("STREAMTABLE", Seq($"a", $"b", $"c"), table("t").select(star()))) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t"), | ||
| UnresolvedHint("INDEX", Seq("t", "emp_job_ix"), table("t").select(star()))) | ||
| UnresolvedHint("INDEX", Seq($"t", $"emp_job_ix"), table("t").select(star()))) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`"), | ||
| UnresolvedHint("MAPJOIN", Seq("default.t"), table("default.t").select(star()))) | ||
| UnresolvedHint("MAPJOIN", Seq(UnresolvedAttribute.quoted("default.t")), | ||
| table("default.t").select(star()))) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"), | ||
| UnresolvedHint("MAPJOIN", Seq("t"), | ||
| UnresolvedHint("MAPJOIN", Seq($"t"), | ||
| table("t").where(Literal(true)).groupBy('a)('a)).orderBy('a.asc)) | ||
| } | ||
|
|
||
| test("SPARK-20854: select hint syntax with expressions") { | ||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"), | ||
| UnresolvedHint("HINT1", Seq($"a", | ||
| UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this test case redundant?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea, @bogdanrdc can you send a follow-up PR to clean it up? |
||
| UnresolvedHint("HINT1", Seq($"a", | ||
| UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1(a, 5, 'a', b) */ * from t"), | ||
| UnresolvedHint("HINT1", Seq($"a", Literal(5), Literal("a"), $"b"), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1('a', (b, c), (1, 2)) */ * from t"), | ||
| UnresolvedHint("HINT1", | ||
| Seq(Literal("a"), | ||
| CreateStruct($"b" :: $"c" :: Nil), | ||
| CreateStruct(Literal(1) :: Literal(2) :: Nil)), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
| } | ||
|
|
||
| test("SPARK-20854: multiple hints") { | ||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1(a, 1) hint2(b, 2) */ * from t"), | ||
| UnresolvedHint("HINT1", Seq($"a", Literal(1)), | ||
| UnresolvedHint("hint2", Seq($"b", Literal(2)), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1(a, 1),hint2(b, 2) */ * from t"), | ||
| UnresolvedHint("HINT1", Seq($"a", Literal(1)), | ||
| UnresolvedHint("hint2", Seq($"b", Literal(2)), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1(a, 1) */ /*+ hint2(b, 2) */ * from t"), | ||
| UnresolvedHint("HINT1", Seq($"a", Literal(1)), | ||
| UnresolvedHint("hint2", Seq($"b", Literal(2)), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| comparePlans( | ||
| parsePlan("SELECT /*+ HINT1(a, 1), hint2(b, 2) */ /*+ hint3(c, 3) */ * from t"), | ||
| UnresolvedHint("HINT1", Seq($"a", Literal(1)), | ||
| UnresolvedHint("hint2", Seq($"b", Literal(2)), | ||
| UnresolvedHint("hint3", Seq($"c", Literal(3)), | ||
| table("t").select(star()) | ||
| ) | ||
| ) | ||
| ) | ||
| ) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Normally, we move such a test suite to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a new test for dsl. I also want a test that calls df.hint |
||
|
|
||
| import org.apache.spark.sql.catalyst.plans.PlanTest | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.test.SharedSQLContext | ||
|
|
||
| class DataFrameHintSuite extends PlanTest with SharedSQLContext { | ||
| import testImplicits._ | ||
| lazy val df = spark.range(10) | ||
|
|
||
| private def check(df: Dataset[_], expected: LogicalPlan) = { | ||
| comparePlans( | ||
| df.queryExecution.logical, | ||
| expected | ||
| ) | ||
| } | ||
|
|
||
| test("various hint parameters") { | ||
| check( | ||
| df.hint("hint1"), | ||
| UnresolvedHint("hint1", Seq(), | ||
| df.logicalPlan | ||
| ) | ||
| ) | ||
|
|
||
| check( | ||
| df.hint("hint1", 1, "a"), | ||
| UnresolvedHint("hint1", Seq(1, "a"), df.logicalPlan) | ||
| ) | ||
|
|
||
| check( | ||
| df.hint("hint1", 1, $"a"), | ||
| UnresolvedHint("hint1", Seq(1, $"a"), | ||
| df.logicalPlan | ||
| ) | ||
| ) | ||
|
|
||
| check( | ||
| df.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")), | ||
| UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")), | ||
| df.logicalPlan | ||
| ) | ||
| ) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In Hive and Oracle, multiple hints are put in the same
/*+ */.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This patch supports both.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gatorsmile does hive support multiple
/*+ */?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nope. Hive does not support multiple
/*+ */There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does not hurt anything if we support more hint styles, as long as they are user-friendly.