Skip to content

Commit be1b282

Browse files
LantaoJincloud-fan
authored andcommitted
[SPARK-32237][SQL][3.0] Resolve hint in CTE
### What changes were proposed in this pull request? The backport of #29062 This PR is to move `Substitution` rule before `Hints` rule in `Analyzer` to avoid hint in CTE not working. ### Why are the changes needed? Below SQL in Spark3.0 will throw AnalysisException, but it works in Spark2.x ```sql WITH cte AS (SELECT /*+ REPARTITION(3) */ T.id, T.data FROM $t1 T) SELECT cte.id, cte.data FROM cte ``` ``` Failed to analyze query: org.apache.spark.sql.AnalysisException: cannot resolve '`cte.id`' given input columns: [cte.data, cte.id]; line 3 pos 7; 'Project ['cte.id, 'cte.data] +- SubqueryAlias cte +- Project [id#21L, data#22] +- SubqueryAlias T +- SubqueryAlias testcat.ns1.ns2.tbl +- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl 'Project ['cte.id, 'cte.data] +- SubqueryAlias cte +- Project [id#21L, data#22] +- SubqueryAlias T +- SubqueryAlias testcat.ns1.ns2.tbl +- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add a unit test Closes #29201 from LantaoJin/SPARK-32237_branch-3.0. Lead-authored-by: LantaoJin <jinlantao@gmail.com> Co-authored-by: Alan Jin <jinlantao@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent ebac47b commit be1b282

3 files changed

Lines changed: 41 additions & 6 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,16 +200,16 @@ class Analyzer(
200200
val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil
201201

202202
lazy val batches: Seq[Batch] = Seq(
203-
Batch("Hints", fixedPoint,
204-
new ResolveHints.ResolveJoinStrategyHints(conf),
205-
new ResolveHints.ResolveCoalesceHints(conf)),
206-
Batch("Simple Sanity Check", Once,
207-
LookupFunctions),
208203
Batch("Substitution", fixedPoint,
209204
CTESubstitution,
210205
WindowsSubstitution,
211206
EliminateUnions,
212207
new SubstituteUnresolvedOrdinals(conf)),
208+
Batch("Hints", fixedPoint,
209+
new ResolveHints.ResolveJoinStrategyHints(conf),
210+
new ResolveHints.ResolveCoalesceHints(conf)),
211+
Batch("Simple Sanity Check", Once,
212+
LookupFunctions),
213213
Batch("Resolution", fixedPoint,
214214
ResolveTableValuedFunctions ::
215215
ResolveNamespace(catalogManager) ::

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.log4j.Level
2525
import org.scalatest.Matchers
2626

2727
import org.apache.spark.api.python.PythonEvalType
28-
import org.apache.spark.sql.catalyst.TableIdentifier
28+
import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
2929
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
3030
import org.apache.spark.sql.catalyst.dsl.expressions._
3131
import org.apache.spark.sql.catalyst.dsl.plans._
@@ -879,4 +879,27 @@ class AnalysisSuite extends AnalysisTest with Matchers {
879879
Seq("Intersect can only be performed on tables with the compatible column types. " +
880880
"timestamp <> double at the second column of the second table"))
881881
}
882+
883+
test("SPARK-32237: Hint in CTE") {
884+
val plan = With(
885+
Project(
886+
Seq(UnresolvedAttribute("cte.a")),
887+
UnresolvedRelation(TableIdentifier("cte"))
888+
),
889+
Seq(
890+
(
891+
"cte",
892+
SubqueryAlias(
893+
AliasIdentifier("cte"),
894+
UnresolvedHint(
895+
"REPARTITION",
896+
Seq(Literal(3)),
897+
Project(testRelation.output, testRelation)
898+
)
899+
)
900+
)
901+
)
902+
)
903+
assertAnalysisSuccess(plan)
904+
}
882905
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3468,6 +3468,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
34683468
}
34693469
}
34703470

3471+
test("SPARK-32237: Hint in CTE") {
3472+
withTable("t") {
3473+
sql("CREATE TABLE t USING PARQUET AS SELECT 1 AS id")
3474+
checkAnswer(
3475+
sql("""
3476+
|WITH cte AS (SELECT /*+ REPARTITION(3) */ * FROM t)
3477+
|SELECT * FROM cte
3478+
""".stripMargin),
3479+
Row(1) :: Nil)
3480+
}
3481+
}
3482+
34713483
test("SPARK-32372: ResolveReferences.dedupRight should only rewrite attributes for ancestor " +
34723484
"plans of the conflict plan") {
34733485
sql("SELECT name, avg(age) as avg_age FROM person GROUP BY name")

0 commit comments

Comments
 (0)