-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-32282][SQL] Improve EnsureRquirement.reorderJoinKeys to handle more scenarios such as PartitioningCollection #29074
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
a0366f2
99493e4
ab237bc
8308649
268326b
d91bcdd
e5b078f
89ad6ef
fa3aafa
1729c8b
e2f7e44
10b4d5a
3cd6df9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -130,9 +130,14 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { | |
| leftKeys: IndexedSeq[Expression], | ||
| rightKeys: IndexedSeq[Expression], | ||
| expectedOrderOfKeys: Seq[Expression], | ||
| currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = { | ||
| currentOrderOfKeys: Seq[Expression]): Option[(Seq[Expression], Seq[Expression])] = { | ||
| if (expectedOrderOfKeys.size != currentOrderOfKeys.size) { | ||
| return (leftKeys, rightKeys) | ||
| return None | ||
| } | ||
|
|
||
| // Check if the current order already satisfies the expected order. | ||
| if (expectedOrderOfKeys.zip(currentOrderOfKeys).forall(p => p._1.semanticEquals(p._2))) { | ||
| return Some(leftKeys, rightKeys) | ||
| } | ||
|
|
||
| // Build a lookup between an expression and the positions its holds in the current key seq. | ||
|
|
@@ -159,10 +164,10 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { | |
| rightKeysBuffer += rightKeys(index) | ||
| case _ => | ||
| // The expression cannot be found, or we have exhausted all indices for that expression. | ||
| return (leftKeys, rightKeys) | ||
| return None | ||
| } | ||
| } | ||
| (leftKeysBuffer, rightKeysBuffer) | ||
| Some(leftKeysBuffer, rightKeysBuffer) | ||
| } | ||
|
|
||
| private def reorderJoinKeys( | ||
|
|
@@ -171,19 +176,50 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { | |
| leftPartitioning: Partitioning, | ||
| rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = { | ||
| if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) { | ||
| (leftPartitioning, rightPartitioning) match { | ||
| case (HashPartitioning(leftExpressions, _), _) => | ||
| reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leftExpressions, leftKeys) | ||
| case (_, HashPartitioning(rightExpressions, _)) => | ||
| reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys) | ||
| case _ => | ||
| (leftKeys, rightKeys) | ||
| } | ||
| reorderJoinKeysRecursively(leftKeys, rightKeys, leftPartitioning, rightPartitioning) | ||
| .getOrElse((leftKeys, rightKeys)) | ||
| } else { | ||
| (leftKeys, rightKeys) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Recursively reorders the join keys based on partitioning. It starts reordering the | ||
| * join keys to match HashPartitioning on either side, followed by PartitioningCollection. | ||
| */ | ||
| private def reorderJoinKeysRecursively( | ||
| leftKeys: Seq[Expression], | ||
| rightKeys: Seq[Expression], | ||
| leftPartitioning: Partitioning, | ||
| rightPartitioning: Partitioning): Option[(Seq[Expression], Seq[Expression])] = { | ||
| (leftPartitioning, rightPartitioning) match { | ||
| case (HashPartitioning(leftExpressions, _), _) => | ||
| reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leftExpressions, leftKeys) | ||
| .orElse(reorderJoinKeysRecursively( | ||
| leftKeys, rightKeys, UnknownPartitioning(0), rightPartitioning)) | ||
| case (_, HashPartitioning(rightExpressions, _)) => | ||
| reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys) | ||
| .orElse(reorderJoinKeysRecursively( | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can be also implemented by looking at left partitioning first then move to the right partitionoing: (leftPartitioning, rightPartitioning) match {
case (HashPartitioning(leftExpressions, _), _) =>
reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leftExpressions, leftKeys)
.orElse(reorderJoinKeysRecursively(
leftKeys, rightKeys, UnknownPartitioning(0), rightPartitioning))
case (PartitioningCollection(partitionings), _) =>
partitionings.foreach { p =>
reorderJoinKeysRecursively(leftKeys, rightKeys, p, rightPartitioning).map { k =>
return Some(k)
}
}
reorderJoinKeysRecursively(leftKeys, rightKeys, UnknownPartitioning(0), rightPartitioning)
case (_, HashPartitioning(rightExpressions, _)) =>
reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys)
case (_, PartitioningCollection(partitionings)) =>
partitionings.foreach { p =>
reorderJoinKeysRecursively(leftKeys, rightKeys, leftPartitioning, p).map { k =>
return Some(k)
}
}
None
case _ =>
None
}However, I chose this way so that the behavior remains the same. If you have |
||
| leftKeys, rightKeys, leftPartitioning, UnknownPartitioning(0))) | ||
| case (PartitioningCollection(partitionings), _) => | ||
| partitionings.foreach { p => | ||
| reorderJoinKeysRecursively(leftKeys, rightKeys, p, rightPartitioning).map { k => | ||
| return Some(k) | ||
|
||
| } | ||
| } | ||
| reorderJoinKeysRecursively(leftKeys, rightKeys, UnknownPartitioning(0), rightPartitioning) | ||
| case (_, PartitioningCollection(partitionings)) => | ||
| partitionings.foreach { p => | ||
|
||
| reorderJoinKeysRecursively(leftKeys, rightKeys, leftPartitioning, p).map { k => | ||
| return Some(k) | ||
| } | ||
| } | ||
| None | ||
| case _ => | ||
| None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * When the physical operators are created for JOIN, the ordering of join keys is based on order | ||
| * in which the join keys appear in the user query. That might not match with the output | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.