Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ identifierComment

relationPrimary
: tableIdentifier sample? tableAlias #tableName
| '(' queryNoWith ')' sample? (AS? strictIdentifier)? #aliasedQuery
| '(' queryNoWith ')' sample? tableAlias #aliasedQuery
| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation
| inlineTable #inlineTableDefault2
| functionTable #tableValuedFunction
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,22 @@ class Analyzer(
// rule: ResolveDeserializer.
case plan if containsDeserializer(plan.expressions) => plan

case u @ UnresolvedSubqueryColumnAliases(columnNames, child) if child.resolved =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The last question. What is the reason why we do not have a separate analyzer rule?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I forgot to move this rule outside. I'll update soon.

// Resolves output attributes if a query has alias names in its subquery:
// e.g., SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
val outputAttrs = child.output
// Checks if the number of the aliases equals to the number of output columns
// in the subquery.
if (columnNames.size != outputAttrs.size) {
u.failAnalysis("Number of column aliases does not match number of columns. " +
s"Number of column aliases: ${columnNames.size}; " +
s"number of columns: ${outputAttrs.size}.")
}
val aliases = outputAttrs.zip(columnNames).map { case (attr, aliasName) =>
Alias(attr, aliasName)()
}
Project(aliases, child)

case q: LogicalPlan =>
logTrace(s"Attempting to resolve ${q.simpleString}")
q.transformExpressionsUp {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
import org.apache.spark.sql.catalyst.parser.ParserUtils
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode}
import org.apache.spark.sql.catalyst.trees.TreeNode
import org.apache.spark.sql.catalyst.util.quoteIdentifier
import org.apache.spark.sql.types.{DataType, Metadata, StructType}
Expand Down Expand Up @@ -422,6 +422,27 @@ case class UnresolvedAlias(
override lazy val resolved = false
}

/**
* Aliased column names resolved by positions for subquery. We could add alias names for output
* columns in the subquery:
* {{{
* // Assign alias names for output columns
* SELECT col1, col2 FROM testData AS t(col1, col2);
* }}}
*
* @param outputColumnNames the [[LogicalPlan]] on which this subquery column aliases apply.
* @param child the logical plan of this subquery.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: the logical plan of this subquery -> the [[LogicalPlan]] on which this subquery column aliases apply

*/
case class UnresolvedSubqueryColumnAliases(
outputColumnNames: Seq[String],
child: LogicalPlan)
extends UnaryNode {

override def output: Seq[Attribute] = Nil

override lazy val resolved = false
}

/**
* Holds the deserializer expression and the attributes that are available during the resolution
* for it. Deserializer expression is a special kind of expression that is not always resolved by
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -750,20 +750,28 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
/**
* Create an alias (SubqueryAlias) for a sub-query. This is practically the same as
* visitAliasedRelation and visitNamedExpression, ANTLR4 however requires us to use 3 different
* hooks.
* hooks. We could add alias names for output columns, for example:
* {{{
* SELECT col1, col2 FROM testData AS t(col1, col2)
* }}}
*/
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
val alias = if (ctx.strictIdentifier == null) {
val alias = if (ctx.tableAlias.strictIdentifier == null) {
// For un-aliased subqueries, use a default alias name that is not likely to conflict with
// normal subquery names, so that parent operators can only access the columns in subquery by
// unqualified names. Users can still use this special qualifier to access columns if they
// know it, but that's not recommended.
"__auto_generated_subquery_name"
} else {
ctx.strictIdentifier.getText
ctx.tableAlias.strictIdentifier.getText
}
val subquery = SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
if (ctx.tableAlias.identifierList != null) {
val columnAliases = visitIdentifierList(ctx.tableAlias.identifierList)
UnresolvedSubqueryColumnAliases(columnAliases, subquery)
} else {
subquery
}

SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -470,4 +470,24 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " +
"number of column aliases: 5; number of columns: 4."))
}

test("SPARK-20962 Support subquery column aliases in FROM clause") {
def tableColumnsWithAliases(outputNames: Seq[String]): LogicalPlan = {
UnresolvedSubqueryColumnAliases(
outputNames,
SubqueryAlias(
"t",
UnresolvedRelation(TableIdentifier("TaBlE3")))
).select(star())
}
assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
assertAnalysisError(
tableColumnsWithAliases("col1" :: Nil),
Seq("Number of column aliases does not match number of columns. " +
"Number of column aliases: 1; number of columns: 4."))
assertAnalysisError(
tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
Seq("Number of column aliases does not match number of columns. " +
"Number of column aliases: 5; number of columns: 4."))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction}
import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
Expand Down Expand Up @@ -495,6 +495,17 @@ class PlanParserSuite extends AnalysisTest {
.select(star()))
}

test("SPARK-20962 Support subquery column aliases in FROM clause") {
assertEqual(
"SELECT * FROM (SELECT a AS x, b AS y FROM t) t(col1, col2)",
UnresolvedSubqueryColumnAliases(
Seq("col1", "col2"),
SubqueryAlias(
"t",
UnresolvedRelation(TableIdentifier("t")).select('a.as("x"), 'b.as("y")))
).select(star()))
}

test("inline table") {
assertEqual("values 1, 2, 3, 4",
UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x)))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ SELECT * FROM testData AS t(col1);

-- Check alias duplication
SELECT a AS col1, b AS col2 FROM testData AS t(c, d);

-- Subquery aliases in FROM clause
SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2);
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 7
-- Number of queries: 8


-- !query 0
Expand Down Expand Up @@ -61,3 +61,11 @@ struct<>
-- !query 6 output
org.apache.spark.sql.AnalysisException
cannot resolve '`a`' given input columns: [t.c, t.d]; line 1 pos 7


-- !query 7
SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
-- !query 7 schema
struct<col1:int,col2:int>
-- !query 7 output
1 1