diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index d2be15d87d023..c8078e10fd044 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -22,21 +22,21 @@ import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, With} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.internal.SQLConf.LEGACY_CTE_PRECEDENCE_ENABLED +import org.apache.spark.sql.internal.SQLConf.{LEGACY_CTE_PRECEDENCE_POLICY, LegacyBehaviorPolicy} /** * Analyze WITH nodes and substitute child plan with CTE definitions. */ object CTESubstitution extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - val isLegacy = SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_ENABLED) - if (isLegacy.isEmpty) { - assertNoNameConflictsInCTE(plan, inTraverse = false) - traverseAndSubstituteCTE(plan, inTraverse = false) - } else if (isLegacy.get) { - legacyTraverseAndSubstituteCTE(plan) - } else { - traverseAndSubstituteCTE(plan, inTraverse = false) + LegacyBehaviorPolicy.withName(SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match { + case LegacyBehaviorPolicy.EXCEPTION => + assertNoNameConflictsInCTE(plan, inTraverse = false) + traverseAndSubstituteCTE(plan, inTraverse = false) + case LegacyBehaviorPolicy.LEGACY => + legacyTraverseAndSubstituteCTE(plan) + case LegacyBehaviorPolicy.CORRECTED => + traverseAndSubstituteCTE(plan, inTraverse = false) } } @@ -54,8 +54,9 @@ object CTESubstitution extends Rule[LogicalPlan] { case (cteName, _) => if (cteNames.contains(cteName)) { throw new AnalysisException(s"Name $cteName is ambiguous in nested CTE. " + - s"Please set ${LEGACY_CTE_PRECEDENCE_ENABLED.key} to false so that name defined " + - "in inner CTE takes precedence. See more details in SPARK-28228.") + s"Please set ${LEGACY_CTE_PRECEDENCE_POLICY.key} to CORRECTED so that name " + + "defined in inner CTE takes precedence. If set it to LEGACY, outer CTE " + + "definitions will take precedence. See more details in SPARK-28228.") } else { cteName } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2214e03f34f0b..c13b28254e400 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2106,13 +2106,20 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_CTE_PRECEDENCE_ENABLED = buildConf("spark.sql.legacy.ctePrecedence.enabled") + object LegacyBehaviorPolicy extends Enumeration { + val EXCEPTION, LEGACY, CORRECTED = Value + } + + val LEGACY_CTE_PRECEDENCE_POLICY = buildConf("spark.sql.legacy.ctePrecedencePolicy") .internal() - .doc("When true, outer CTE definitions takes precedence over inner definitions. If set to " + - "false, inner CTE definitions take precedence. The default value is empty, " + - "AnalysisException is thrown while name conflict is detected in nested CTE.") - .booleanConf - .createOptional + .doc("When LEGACY, outer CTE definitions takes precedence over inner definitions. If set to " + + "CORRECTED, inner CTE definitions take precedence. The default value is EXCEPTION, " + + "AnalysisException is thrown while name conflict is detected in nested CTE. This config " + + "will be removed in future versions and CORRECTED will be the only behavior.") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) + .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) val LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC = buildConf("spark.sql.legacy.arrayExistsFollowsThreeValuedLogic") diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql index 2f2606d44d910..d8754d3023965 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql @@ -2,7 +2,7 @@ create temporary view t as select * from values 0, 1, 2 as t(id); create temporary view t2 as select * from values 0, 1 as t(id); -- CTE legacy substitution -SET spark.sql.legacy.ctePrecedence.enabled=true; +SET spark.sql.legacy.ctePrecedencePolicy=legacy; -- CTE in CTE definition WITH t as ( diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql index b711bf338ab08..8b9ce8911b133 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql @@ -1,2 +1,2 @@ ---SET spark.sql.legacy.ctePrecedence.enabled = false +--SET spark.sql.legacy.ctePrecedencePolicy = corrected --IMPORT cte.sql diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out index a9709c4a79793..a20ae55b1b876 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out @@ -19,11 +19,11 @@ struct<> -- !query -SET spark.sql.legacy.ctePrecedence.enabled=true +SET spark.sql.legacy.ctePrecedencePolicy=legacy -- !query schema struct -- !query output -spark.sql.legacy.ctePrecedence.enabled true +spark.sql.legacy.ctePrecedencePolicy legacy -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index 1d50aa8f57505..6486fba18d20f 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -207,7 +207,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -226,7 +226,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -245,7 +245,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -299,7 +299,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -314,7 +314,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -330,7 +330,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query