-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-12503] [SQL] Pushing Limit Through Union All #10451
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 32 commits
01e4cdf
6835704
9180687
b38a21e
d2b84af
fda8025
ac0dccd
6e0018b
0546772
b37a64f
661260b
2dfa0fd
d929d9b
4070d2f
38dcfb2
cb3fc83
8dbacc7
41b9172
56fd782
b5ac8d7
77105e3
7f25d91
ae59f42
3ccf3bd
004ed66
6998ec9
09a5672
358d62e
2823a57
10d570c
cfbeea7
ca5c104
56f0c16
62d5cbe
7cf955f
7899312
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,6 +90,13 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { | |
| Statistics(sizeInBytes = children.map(_.statistics.sizeInBytes).product) | ||
| } | ||
|
|
||
| /** | ||
| * Returns the limited number of rows to be returned. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Specify that any operator that a
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And, thus, we should fix
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. Thanks!
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually we will push down
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, we need to override the function in both directions. Let me update the comments. Thanks! The value of |
||
| * | ||
| * Any operator that a Limit can be pushed passed should override this function. | ||
| */ | ||
| def maxRows: Option[Expression] = None | ||
|
|
||
| /** | ||
| * Returns true if this expression and all its children have been resolved to a specific schema | ||
| * and false if it still contains any unresolved placeholders. Implementations of LogicalPlan | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.optimizer | ||
|
|
||
| import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries | ||
| import org.apache.spark.sql.catalyst.plans.PlanTest | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.rules._ | ||
| import org.apache.spark.sql.catalyst.dsl.plans._ | ||
| import org.apache.spark.sql.catalyst.dsl.expressions._ | ||
|
|
||
| class PushdownLimitsSuite extends PlanTest { | ||
| object Optimize extends RuleExecutor[LogicalPlan] { | ||
| val batches = | ||
| Batch("Subqueries", Once, | ||
| EliminateSubQueries) :: | ||
| Batch("Push Down Limit", Once, | ||
| PushDownLimit, | ||
| CombineLimits, | ||
| ConstantFolding, | ||
| BooleanSimplification) :: Nil | ||
| } | ||
|
|
||
| val testRelation = LocalRelation('a.int, 'b.int, 'c.int) | ||
| val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int) | ||
|
|
||
| test("Union: limit to each side") { | ||
| val unionQuery = Union(testRelation, testRelation2).limit(1) | ||
| val unionOptimized = Optimize.execute(unionQuery.analyze) | ||
| val unionCorrectAnswer = | ||
| Limit(1, Union(testRelation.limit(1), testRelation2.limit(1))).analyze | ||
| comparePlans(unionOptimized, unionCorrectAnswer) | ||
| } | ||
|
|
||
| test("Union: limit to each side with the new limit number") { | ||
| val testLimitUnion = Union(testRelation, testRelation2.limit(3)) | ||
| val unionQuery = testLimitUnion.limit(1) | ||
| val unionOptimized = Optimize.execute(unionQuery.analyze) | ||
| val unionCorrectAnswer = | ||
| Limit(1, Union(testRelation.limit(1), testRelation2.limit(1))).analyze | ||
| comparePlans(unionOptimized, unionCorrectAnswer) | ||
| } | ||
|
|
||
| test("Union: no limit to both sides") { | ||
| val testLimitUnion = Union(testRelation.limit(2), testRelation2.select('d).limit(3)) | ||
| val unionQuery = testLimitUnion.limit(2) | ||
| val unionOptimized = Optimize.execute(unionQuery.analyze) | ||
| val unionCorrectAnswer = | ||
| Limit(2, Union(testRelation.limit(2), testRelation2.select('d).limit(3))).analyze | ||
| comparePlans(unionOptimized, unionCorrectAnswer) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, but why not break this into two parts. So that we push to the left when the left is not limited and we push to the right when the right is not limited. Now you push to both sides if either is not limited.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, you are right. : )
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we also check the limit value? If the
maxRowsis larger than the limit we wanna push down, seems it still makes sense to push it down?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that also makes sense. Will do the change after these three running test cases. : )