Skip to content
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ valueExpression
: primaryExpression #valueExpressionDefault
| operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary
| left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary
| left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary
| left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary
| left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary
| left=valueExpression operator=HAT right=valueExpression #arithmeticBinary
| left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary
Expand Down Expand Up @@ -582,7 +582,7 @@ comparisonOperator
;

arithmeticOperator
: PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | HAT
: PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT
;

predicateOperator
Expand Down Expand Up @@ -861,6 +861,7 @@ DIV: 'DIV';
TILDE: '~';
AMPERSAND: '&';
PIPE: '|';
CONCAT_PIPE: '||';
HAT: '^';

PERCENTLIT: 'PERCENT';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
CollapseRepartition,
CollapseProject,
CollapseWindow,
CollapseConcat,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not part of Operator combine. Maybe move it to the spot around SimplifyCasts

CombineFilters,
CombineLimits,
CombineUnions,
Expand Down Expand Up @@ -608,6 +609,31 @@ object CollapseWindow extends Rule[LogicalPlan] {
}
}

/**
* Collapse nested [[Concat]] expressions.
*/
object CollapseConcat extends Rule[LogicalPlan] {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move it to org.apache.spark.sql.catalyst.optimizer.expressions.scala


private def extractConcatExprs(e: Concat): Seq[Expression] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tail recursion? or using queue/stack?

e.children.foldLeft(mutable.ArrayBuffer[Expression]()) { case (exprList, e) =>
exprList ++= (e match {
case concat: Concat => extractConcatExprs(concat)
case _ => e :: Nil
})
}
}

def apply(plan: LogicalPlan): LogicalPlan = plan.transform {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plan transformAllExpressions ?

case p @ Project(exprs, _) if exprs.exists(_.collect { case _: Concat => true }.size > 1) =>
val projectList = exprs.map { expr =>
expr.transformDown {
case concat: Concat => Concat(extractConcatExprs(concat))
}
}.asInstanceOf[Seq[NamedExpression]]
p.copy(projectList = projectList)
}
}

/**
* Generate a list of additional filters from an operator's existing constraint but remove those
* that are either already part of the operator's condition or are part of the operator's child
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
Add(left, right)
case SqlBaseParser.MINUS =>
Subtract(left, right)
case SqlBaseParser.CONCAT_PIPE =>
Concat(left :: right :: Nil)
case SqlBaseParser.AMPERSAND =>
BitwiseAnd(left, right)
case SqlBaseParser.HAT =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,11 @@ select 1 - 2;
select 2 * 5;
select 5 % 3;
select pmod(-7, 3);

-- check operator precedence
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add the precedence rules we follow in the comments?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

EXPLAIN SELECT 'a' || 1 + 2;
EXPLAIN SELECT 1 - 2 || 'b';
EXPLAIN SELECT 2 * 4 + 3 || 'b';
EXPLAIN SELECT 3 + 1 || 'a' || 4 / 2;
EXPLAIN SELECT 1 == 1 OR 'a' || 'b' == 'ab';
EXPLAIN SELECT 'a' || 'c' == 'ac' AND 2 == 3;
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
-- Argument number exception
select concat_ws();
select format_string();

-- A pipe operator for string concatenation
SELECT 'a' || 'b';

-- Check if catalyst collapses multiple `Concat`s
EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10));
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 28
-- Number of queries: 34


-- !query 0
Expand Down Expand Up @@ -224,3 +224,63 @@ select pmod(-7, 3)
struct<pmod(-7, 3):int>
-- !query 27 output
2


-- !query 28
EXPLAIN SELECT 'a' || 1 + 2
-- !query 28 schema
struct<plan:string>
-- !query 28 output
== Physical Plan ==
*Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE))#x]
+- Scan OneRowRelation[]


-- !query 29
EXPLAIN SELECT 1 - 2 || 'b'
-- !query 29 schema
struct<plan:string>
-- !query 29 output
== Physical Plan ==
*Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x]
+- Scan OneRowRelation[]


-- !query 30
EXPLAIN SELECT 2 * 4 + 3 || 'b'
-- !query 30 schema
struct<plan:string>
-- !query 30 output
== Physical Plan ==
*Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x]
+- Scan OneRowRelation[]


-- !query 31
EXPLAIN SELECT 3 + 1 || 'a' || 4 / 2
-- !query 31 schema
struct<plan:string>
-- !query 31 output
== Physical Plan ==
*Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x]
+- Scan OneRowRelation[]


-- !query 32
EXPLAIN SELECT 1 == 1 OR 'a' || 'b' == 'ab'
-- !query 32 schema
struct<plan:string>
-- !query 32 output
== Physical Plan ==
*Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x]
+- Scan OneRowRelation[]


-- !query 33
EXPLAIN SELECT 'a' || 'c' == 'ac' AND 2 == 3
-- !query 33 schema
struct<plan:string>
-- !query 33 output
== Physical Plan ==
*Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x]
+- Scan OneRowRelation[]
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 2
-- Number of queries: 4


-- !query 0
Expand All @@ -18,3 +18,37 @@ struct<>
-- !query 1 output
org.apache.spark.sql.AnalysisException
requirement failed: format_string() should take at least 1 argument; line 1 pos 7


-- !query 2
SELECT 'a' || 'b'
-- !query 2 schema
struct<concat(a, b):string>
-- !query 2 output
ab


-- !query 3
EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10))
-- !query 3 schema
struct<plan:string>
-- !query 3 output
== Parsed Logical Plan ==
'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x]
+- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x]
+- 'UnresolvedTableValuedFunction range, [10]

== Analyzed Logical Plan ==
col: string
Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x]
+- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL]
+- Range (0, 10, step=1, splits=None)

== Optimized Logical Plan ==
Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x]
+- Range (0, 10, step=1, splits=None)

== Physical Plan ==
*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x]
+- *Range (0, 10, step=1, splits=2)
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ package org.apache.spark.sql.execution

import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder}
import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder}
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort}
Expand Down Expand Up @@ -290,4 +290,15 @@ class SparkSqlParserSuite extends PlanTest {
basePlan,
numPartitions = newConf.numShufflePartitions)))
}

test("pipeline concatenation") {
val concat = Concat(
Concat(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil) ::
UnresolvedAttribute("c") ::
Nil
)
assertEqual(
"SELECT a || b || c FROM t",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is tricky to combine sequential Concat in parser, maybe we can do it in optimizer later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aha, I see. WDYT, @gatorsmile ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I prefer to simpler codes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I'll try to add a new rule for that. Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I am thinking is a follow PR to add the rule.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, I also feel so. Is it okay to remove the rule from this pr? @gatorsmile. If ok, I'll fix the points you reviewed in follow-up.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverted, thanks

Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t"))))
}
}