-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19951][SQL] Add string concatenate operator || to Spark SQL #17711
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
5544ff2
05d490e
afcd950
f89d131
5957545
cb4b26e
8890b94
df3869a
c88652c
96db575
de89791
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,6 +87,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) | |
| CollapseRepartition, | ||
| CollapseProject, | ||
| CollapseWindow, | ||
| CollapseConcat, | ||
| CombineFilters, | ||
| CombineLimits, | ||
| CombineUnions, | ||
|
|
@@ -608,6 +609,31 @@ object CollapseWindow extends Rule[LogicalPlan] { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Collapse nested [[Concat]] expressions. | ||
| */ | ||
| object CollapseConcat extends Rule[LogicalPlan] { | ||
|
||
|
|
||
| private def extractConcatExprs(e: Concat): Seq[Expression] = { | ||
|
||
| e.children.foldLeft(mutable.ArrayBuffer[Expression]()) { case (exprList, e) => | ||
| exprList ++= (e match { | ||
| case concat: Concat => extractConcatExprs(concat) | ||
| case _ => e :: Nil | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| def apply(plan: LogicalPlan): LogicalPlan = plan.transform { | ||
|
||
| case p @ Project(exprs, _) if exprs.exists(_.collect { case _: Concat => true }.size > 1) => | ||
| val projectList = exprs.map { expr => | ||
| expr.transformDown { | ||
| case concat: Concat => Concat(extractConcatExprs(concat)) | ||
| } | ||
| }.asInstanceOf[Seq[NamedExpression]] | ||
| p.copy(projectList = projectList) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Generate a list of additional filters from an operator's existing constraint but remove those | ||
| * that are either already part of the operator's condition or are part of the operator's child | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,3 +32,11 @@ select 1 - 2; | |
| select 2 * 5; | ||
| select 5 % 3; | ||
| select pmod(-7, 3); | ||
|
|
||
| -- check operator precedence | ||
|
||
| EXPLAIN SELECT 'a' || 1 + 2; | ||
| EXPLAIN SELECT 1 - 2 || 'b'; | ||
| EXPLAIN SELECT 2 * 4 + 3 || 'b'; | ||
| EXPLAIN SELECT 3 + 1 || 'a' || 4 / 2; | ||
| EXPLAIN SELECT 1 == 1 OR 'a' || 'b' == 'ab'; | ||
| EXPLAIN SELECT 'a' || 'c' == 'ac' AND 2 == 3; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,10 @@ | ||
| -- Argument number exception | ||
| select concat_ws(); | ||
| select format_string(); | ||
|
|
||
| -- A pipe operator for string concatenation | ||
| SELECT 'a' || 'b'; | ||
|
|
||
| -- Check if catalyst collapses multiple `Concat`s | ||
| EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col | ||
| FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution | |
|
|
||
| import org.apache.spark.sql.SaveMode | ||
| import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} | ||
| import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} | ||
| import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} | ||
| import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType} | ||
| import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder} | ||
| import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder} | ||
| import org.apache.spark.sql.catalyst.parser.ParseException | ||
| import org.apache.spark.sql.catalyst.plans.PlanTest | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort} | ||
|
|
@@ -290,4 +290,15 @@ class SparkSqlParserSuite extends PlanTest { | |
| basePlan, | ||
| numPartitions = newConf.numShufflePartitions))) | ||
| } | ||
|
|
||
| test("pipeline concatenation") { | ||
| val concat = Concat( | ||
| Concat(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil) :: | ||
| UnresolvedAttribute("c") :: | ||
| Nil | ||
| ) | ||
| assertEqual( | ||
| "SELECT a || b || c FROM t", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it is tricky to combine sequential
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aha, I see. WDYT, @gatorsmile ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I prefer to simpler codes.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, I'll try to add a new rule for that. Thanks!
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually I am thinking is a follow PR to add the rule.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea, I also feel so. Is it okay to remove the rule from this pr? @gatorsmile. If ok, I'll fix the points you reviewed in follow-up.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am fine
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reverted, thanks |
||
| Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not part of
Operator combine. Maybe move it to the spot aroundSimplifyCasts