Skip to content

Commit ef521d3

Browse files
LuciferYangMaxGekk
authored andcommitted
[SPARK-39354][SQL] Ensure show Table or view not found even if there are dataTypeMismatchError related to Filter at the same time
### What changes were proposed in this pull request? After SPARK-38118, `dataTypeMismatchError` related to `Filter` will be checked and throw in `RemoveTempResolvedColumn`, this will cause compatibility issue with exception message presentation. For example, the following case: ``` spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;") spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show ``` The expected message is ``` Table or view not found: t2 ``` But the actual message is ``` org.apache.spark.sql.AnalysisException: cannot resolve 'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires date type, however, ''2020-12-27'' is of string type.; line 1 pos 76 ``` For forward compatibility, this pr change to only records `DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis` ### Why are the changes needed? Fix analysis exception message compatibility. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions and add a new test case Closes #36746 from LuciferYang/SPARK-39354. Authored-by: yangjie01 <yangjie01@baidu.com> Signed-off-by: Max Gekk <max.gekk@gmail.com> (cherry picked from commit 89fdb8a) Signed-off-by: Max Gekk <max.gekk@gmail.com>
1 parent fef5695 commit ef521d3

3 files changed

Lines changed: 32 additions & 8 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try}
2828

2929
import org.apache.spark.sql.AnalysisException
3030
import org.apache.spark.sql.catalyst._
31-
import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR}
31+
import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE
3232
import org.apache.spark.sql.catalyst.catalog._
3333
import org.apache.spark.sql.catalyst.encoders.OuterScopes
3434
import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _}
@@ -4328,10 +4328,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] {
43284328
case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure =>
43294329
e.checkInputDataTypes() match {
43304330
case TypeCheckResult.TypeCheckFailure(message) =>
4331-
e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
4332-
e.failAnalysis(
4333-
s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
4334-
extraHintForAnsiTypeCoercionExpression(plan))
4331+
e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message)
43354332
}
43364333
case _ =>
43374334
})

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
5050

5151
val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError")
5252

53+
val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError")
54+
5355
protected def failAnalysis(msg: String): Nothing = {
5456
throw new AnalysisException(msg)
5557
}
@@ -174,7 +176,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
174176
}
175177
}
176178

177-
getAllExpressions(operator).foreach(_.foreachUp {
179+
val expressions = getAllExpressions(operator)
180+
181+
expressions.foreach(_.foreachUp {
182+
case e: Expression =>
183+
e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match {
184+
case Some(message) =>
185+
e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
186+
extraHintForAnsiTypeCoercionExpression(operator))
187+
case _ =>
188+
}
189+
case _ =>
190+
})
191+
192+
expressions.foreach(_.foreachUp {
178193
case a: Attribute if !a.resolved =>
179194
val missingCol = a.sql
180195
val candidates = operator.inputSet.toSeq.map(_.qualifiedName)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,13 +1170,25 @@ class AnalysisSuite extends AnalysisTest with Matchers {
11701170
|WITH t as (SELECT true c, false d)
11711171
|SELECT (t.c AND t.d) c
11721172
|FROM t
1173-
|GROUP BY t.c
1173+
|GROUP BY t.c, t.d
11741174
|HAVING ${func}(c) > 0d""".stripMargin),
1175-
Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
1175+
Seq(s"cannot resolve '$func(c)' due to data type mismatch"),
11761176
false)
11771177
}
11781178
}
11791179

1180+
test("SPARK-39354: should be `Table or view not found`") {
1181+
assertAnalysisError(parsePlan(
1182+
s"""
1183+
|WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt)
1184+
|SELECT *
1185+
|FROM t1
1186+
|JOIN t2 ON t1.user_id = t2.user_id
1187+
|WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin),
1188+
Seq(s"Table or view not found: t2"),
1189+
false)
1190+
}
1191+
11801192
test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") {
11811193
val innerRelation = SubqueryAlias("src1", testRelation)
11821194
val outerRelation = SubqueryAlias("src2", testRelation)

0 commit comments

Comments
 (0)