Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,21 @@ import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, With}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LEGACY_CTE_PRECEDENCE_ENABLED
import org.apache.spark.sql.internal.SQLConf.{LEGACY_CTE_PRECEDENCE_POLICY, LegacyBehaviorPolicy}

/**
* Analyze WITH nodes and substitute child plan with CTE definitions.
*/
object CTESubstitution extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = {
val isLegacy = SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_ENABLED)
if (isLegacy.isEmpty) {
assertNoNameConflictsInCTE(plan, inTraverse = false)
traverseAndSubstituteCTE(plan, inTraverse = false)
} else if (isLegacy.get) {
legacyTraverseAndSubstituteCTE(plan)
} else {
traverseAndSubstituteCTE(plan, inTraverse = false)
LegacyBehaviorPolicy.withName(SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match {
case LegacyBehaviorPolicy.EXCEPTION =>
assertNoNameConflictsInCTE(plan, inTraverse = false)
traverseAndSubstituteCTE(plan, inTraverse = false)
case LegacyBehaviorPolicy.LEGACY =>
legacyTraverseAndSubstituteCTE(plan)
case LegacyBehaviorPolicy.NEW_BEHAVIOR =>
traverseAndSubstituteCTE(plan, inTraverse = false)
}
}

Expand All @@ -54,8 +54,8 @@ object CTESubstitution extends Rule[LogicalPlan] {
case (cteName, _) =>
if (cteNames.contains(cteName)) {
throw new AnalysisException(s"Name $cteName is ambiguous in nested CTE. " +
s"Please set ${LEGACY_CTE_PRECEDENCE_ENABLED.key} to false so that name defined " +
"in inner CTE takes precedence. See more details in SPARK-28228.")
s"Please set ${LEGACY_CTE_PRECEDENCE_POLICY.key} to NEW_BEHAVIOR so that name " +
"defined in inner CTE takes precedence. See more details in SPARK-28228.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since it's enum now, maybe also explain what will happen if set it to legacy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, done in 47edd88.

} else {
cteName
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2106,13 +2106,19 @@ object SQLConf {
.booleanConf
.createWithDefault(false)

val LEGACY_CTE_PRECEDENCE_ENABLED = buildConf("spark.sql.legacy.ctePrecedence.enabled")
object LegacyBehaviorPolicy extends Enumeration {
val EXCEPTION, LEGACY, NEW_BEHAVIOR = Value
Copy link
Member

@viirya viirya Feb 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NEW_BEHAVIOR sounds not a good name. INNER_FIRST?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or INNER_PRECEDENCE?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the advice @viirya. Yes, both INNER_FIRST and INNER_PRECEDENCE are more accurate in this PR, but I also want to make this enum as a common config value, which can be reused in other similar issues. WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about CORRECTED to indicate that it's the corrected behavior?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, change to CORRECTED in 47edd88

}

val LEGACY_CTE_PRECEDENCE_POLICY = buildConf("spark.sql.legacy.ctePrecedencePolicy")
.internal()
.doc("When true, outer CTE definitions takes precedence over inner definitions. If set to " +
"false, inner CTE definitions take precedence. The default value is empty, " +
.doc("When LEGACY, outer CTE definitions takes precedence over inner definitions. If set to " +
"NEW_BEHAVIOR, inner CTE definitions take precedence. The default value is EXCEPTION, " +
"AnalysisException is thrown while name conflict is detected in nested CTE.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add one more sentence: This config will be removed in future versions and CORRECTED will be the only behavior.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, added in bc70f75.

.booleanConf
.createOptional
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
.createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

val LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC =
buildConf("spark.sql.legacy.arrayExistsFollowsThreeValuedLogic")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ create temporary view t as select * from values 0, 1, 2 as t(id);
create temporary view t2 as select * from values 0, 1 as t(id);

-- CTE legacy substitution
SET spark.sql.legacy.ctePrecedence.enabled=true;
SET spark.sql.legacy.ctePrecedencePolicy=legacy;

-- CTE in CTE definition
WITH t as (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
--SET spark.sql.legacy.ctePrecedence.enabled = false
--SET spark.sql.legacy.ctePrecedencePolicy = new_behavior
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

corrected

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, done in bc70f75.

--IMPORT cte.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ struct<>


-- !query
SET spark.sql.legacy.ctePrecedence.enabled=true
SET spark.sql.legacy.ctePrecedencePolicy=legacy
-- !query schema
struct<key:string,value:string>
-- !query output
spark.sql.legacy.ctePrecedence.enabled true
spark.sql.legacy.ctePrecedencePolicy legacy


-- !query
Expand Down
12 changes: 6 additions & 6 deletions sql/core/src/test/resources/sql-tests/results/cte.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ SELECT * FROM t2
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to NEW_BEHAVIOR so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;


-- !query
Expand All @@ -226,7 +226,7 @@ SELECT * FROM t2
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to NEW_BEHAVIOR so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;


-- !query
Expand All @@ -245,7 +245,7 @@ SELECT * FROM t2
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to NEW_BEHAVIOR so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;


-- !query
Expand Down Expand Up @@ -299,7 +299,7 @@ SELECT (
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to NEW_BEHAVIOR so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;


-- !query
Expand All @@ -314,7 +314,7 @@ SELECT (
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to NEW_BEHAVIOR so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;


-- !query
Expand All @@ -330,7 +330,7 @@ SELECT (
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to NEW_BEHAVIOR so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;


-- !query
Expand Down