-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-53738][SQL] PlannedWrite should preserve custom sort order when query output contains literal #52474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-53738][SQL] PlannedWrite should preserve custom sort order when query output contains literal #52474
Changes from 4 commits
a0aa9f4
2a9613b
2a7361c
8fdb230
376e2b6
d439be1
ad09914
0430f18
2b1f8a5
9ace506
7ee4d92
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans | |
| import scala.collection.mutable | ||
|
|
||
| import org.apache.spark.sql.catalyst.SQLConfHelper | ||
| import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeSet, Empty2Null, Expression, NamedExpression, SortOrder} | ||
| import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeSet, Empty2Null, Expression, NamedExpression, SortOrder} | ||
| import org.apache.spark.sql.internal.SQLConf | ||
|
|
||
| /** | ||
|
|
@@ -128,6 +128,9 @@ trait AliasAwareQueryOutputOrdering[T <: QueryPlan[T]] | |
| } | ||
| } | ||
| } | ||
| newOrdering.takeWhile(_.isDefined).flatten.toSeq | ||
| newOrdering.takeWhile(_.isDefined).flatten.toSeq ++ outputExpressions.filter { | ||
| case Alias(child, _) => child.foldable | ||
| case expr => expr.foldable | ||
| }.map(SortOrder(_, Ascending).copy(isConstant = true)) | ||
|
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,10 +63,23 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper | |
| hasLogicalSort: Boolean, | ||
| orderingMatched: Boolean, | ||
| hasEmpty2Null: Boolean = false)(query: => Unit): Unit = { | ||
| var optimizedPlan: LogicalPlan = null | ||
| executeAndCheckOrderingAndCustomValidate( | ||
| hasLogicalSort, orderingMatched, hasEmpty2Null)(query)(_ => ()) | ||
| } | ||
|
|
||
| /** | ||
| * Execute a write query and check ordering of the plan, then do custom validation | ||
| */ | ||
| protected def executeAndCheckOrderingAndCustomValidate( | ||
| hasLogicalSort: Boolean, | ||
| orderingMatched: Boolean, | ||
| hasEmpty2Null: Boolean = false)(query: => Unit)( | ||
| customValidate: LogicalPlan => Unit): Unit = { | ||
| @volatile var optimizedPlan: LogicalPlan = null | ||
|
|
||
| val listener = new QueryExecutionListener { | ||
| override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { | ||
| val conf = qe.sparkSession.sessionState.conf | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a bugfix, the listener runs in another thread, without this change, |
||
| qe.optimizedPlan match { | ||
| case w: V1WriteCommand => | ||
| if (hasLogicalSort && conf.getConf(SQLConf.PLANNED_WRITE_ENABLED)) { | ||
|
|
@@ -87,7 +100,8 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper | |
|
|
||
| // Check whether the output ordering is matched before FileFormatWriter executes rdd. | ||
| assert(FileFormatWriter.outputOrderingMatched == orderingMatched, | ||
| s"Expect: $orderingMatched, Actual: ${FileFormatWriter.outputOrderingMatched}") | ||
| s"Expect orderingMatched: $orderingMatched, " + | ||
| s"Actual: ${FileFormatWriter.outputOrderingMatched}") | ||
|
|
||
| sparkContext.listenerBus.waitUntilEmpty() | ||
|
|
||
|
|
@@ -103,6 +117,8 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper | |
| assert(empty2nullExpr == hasEmpty2Null, | ||
| s"Expect hasEmpty2Null: $hasEmpty2Null, Actual: $empty2nullExpr. Plan:\n$optimizedPlan") | ||
|
|
||
| customValidate(optimizedPlan) | ||
|
|
||
| spark.listenerManager.unregister(listener) | ||
| } | ||
| } | ||
|
|
@@ -228,8 +244,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write | |
| case s: SortExec => s | ||
| }.exists { | ||
| case SortExec(Seq( | ||
| SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _), | ||
| SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _) | ||
| SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _, _), | ||
| SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _, _) | ||
| ), false, _, _) => true | ||
| case _ => false | ||
| }, plan) | ||
|
|
@@ -275,8 +291,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write | |
| case s: SortExec => s | ||
| }.exists { | ||
| case SortExec(Seq( | ||
| SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _), | ||
| SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _) | ||
| SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _, _), | ||
| SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _, _) | ||
| ), false, _, _) => true | ||
| case _ => false | ||
| }, plan) | ||
|
|
@@ -391,4 +407,30 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("v1 write with sort by literal column preserve custom order") { | ||
| withPlannedWrite { _ => | ||
| withTable("t") { | ||
| sql( | ||
| """ | ||
| |CREATE TABLE t(i INT, j INT, k STRING) USING PARQUET | ||
| |PARTITIONED BY (k) | ||
| |""".stripMargin) | ||
| executeAndCheckOrderingAndCustomValidate(hasLogicalSort = true, orderingMatched = true) { | ||
| sql( | ||
| """ | ||
| |INSERT OVERWRITE t | ||
| |SELECT i, j, '0' as k FROM t0 SORT BY k, i | ||
| |""".stripMargin) | ||
| } { optimizedPlan => | ||
| assert { | ||
| optimizedPlan.outputOrdering.exists { | ||
| case SortOrder(attr: AttributeReference, _, _, _, _) => attr.name == "i" | ||
| case _ => false | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.