From aff9b95da004abc3d530bedadacfd99025fa6480 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 24 May 2017 08:12:03 +0900 Subject: [PATCH 1/4] Support column aliases for catalog tables --- .../spark/sql/catalyst/parser/SqlBase.g4 | 8 ++- .../sql/catalyst/analysis/Analyzer.scala | 18 ++++++- .../sql/catalyst/analysis/unresolved.scala | 11 +++- .../sql/catalyst/parser/AstBuilder.scala | 11 ++-- .../sql/catalyst/analysis/AnalysisSuite.scala | 14 +++++ .../sql/catalyst/analysis/AnalysisTest.scala | 1 + .../sql/catalyst/parser/PlanParserSuite.scala | 11 +++- .../parser/TableIdentifierParserSuite.scala | 2 +- .../resources/sql-tests/inputs/aliases.sql | 14 +++++ .../sql-tests/results/aliases.sql.out | 54 +++++++++++++++++++ .../sql-tests/results/operators.sql.out | 2 +- .../benchmark/TPCDSQueryBenchmark.scala | 4 +- .../apache/spark/sql/hive/test/TestHive.scala | 2 +- 13 files changed, 137 insertions(+), 15 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/aliases.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/aliases.sql.out diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index dc11e536efc45..8753a9a958a29 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -472,13 +472,17 @@ identifierComment ; relationPrimary - : tableIdentifier sample? (AS? strictIdentifier)? #tableName + : catalogTable #tableName | '(' queryNoWith ')' sample? (AS? strictIdentifier) #aliasedQuery | '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation | inlineTable #inlineTableDefault2 | functionTable #tableValuedFunction ; +catalogTable + : tableIdentifier sample? tableAlias + ; + inlineTable : VALUES expression (',' expression)* tableAlias ; @@ -711,7 +715,7 @@ nonReserved | ADD | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST | AFTER | MAP | ARRAY | STRUCT - | LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER + | LATERAL | WINDOW | REDUCE | TRANSFORM | SERDE | SERDEPROPERTIES | RECORDREADER | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | GLOBAL | TEMPORARY | OPTIONS | GROUPING | CUBE | ROLLUP diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 85cf8ddbaacf4..993180d37ae2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -593,7 +593,23 @@ class Analyzer( def resolveRelation(plan: LogicalPlan): LogicalPlan = plan match { case u: UnresolvedRelation if !isRunningDirectlyOnFiles(u.tableIdentifier) => val defaultDatabase = AnalysisContext.get.defaultDatabase - val relation = lookupTableFromCatalog(u, defaultDatabase) + val foundRelation = lookupTableFromCatalog(u, defaultDatabase) + + // If alias names assigned, add `Project` with the aliases + val relation = if (u.outputNames.nonEmpty) { + val outputAttrs = foundRelation.output + // Checks if the number of the aliases is equal to expected one + if (u.outputNames.size != outputAttrs.size) { + u.failAnalysis(s"expected ${outputAttrs.size} columns but found " + + s"${u.outputNames.size} columns in alias names") + } + val aliases = outputAttrs.zip(u.outputNames).map { + case (attr, name) => Alias(attr, name)() + } + Project(aliases, foundRelation) + } else { + foundRelation + } resolveRelation(relation) // The view's child should be a logical plan parsed from the `desc.viewText`, the variable // `viewText` should be defined, or else we throw an error on the generation of the View diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 51bef6e20b9fa..4a5eecd029071 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -36,8 +36,17 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str /** * Holds the name of a relation that has yet to be looked up in a catalog. + * We could add alias names for columns in a relation: + * {{{ + * // Assign alias names + * SELECT col1, col2 FROM testData AS t(col1, col2); + * }}} */ -case class UnresolvedRelation(tableIdentifier: TableIdentifier) extends LeafNode { +case class UnresolvedRelation( + tableIdentifier: TableIdentifier, + outputNames: Seq[String] = Seq.empty) + extends LeafNode { + /** Returns a `.` separated name for this relation. */ def tableName: String = tableIdentifier.unquotedString diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7d2e3a6fe7580..05ecc54e65d6d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -676,14 +676,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create an aliased table reference. This is typically used in FROM clauses. */ override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) { - val table = UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier)) - - val tableWithAlias = Option(ctx.strictIdentifier).map(_.getText) match { + val tableId = visitTableIdentifier(ctx.catalogTable.tableIdentifier) + val table = Option(ctx.catalogTable.tableAlias.identifierList) match { + case Some(aliases) => UnresolvedRelation(tableId, visitIdentifierList(aliases)) + case _ => UnresolvedRelation(tableId) + } + val tableWithAlias = Option(ctx.catalogTable.tableAlias.strictIdentifier).map(_.getText) match { case Some(strictIdentifier) => SubqueryAlias(strictIdentifier, table) case _ => table } - tableWithAlias.optionalMap(ctx.sample)(withSample) + tableWithAlias.optionalMap(ctx.catalogTable.sample)(withSample) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 7eccca2e85649..decd9bb1043a2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -467,4 +467,18 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers { rangeWithAliases(3 :: Nil, "a" :: "b" :: Nil), Seq("expected 1 columns but found 2 columns")) } + + test("SPARK-20841 Support table column aliases in FROM clause") { + def tableColumnsWithAliases(outputNames: Seq[String]): LogicalPlan = { + SubqueryAlias("t", UnresolvedRelation(TableIdentifier("TaBlE3"), outputNames)) + .select(star()) + } + assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil)) + assertAnalysisError( + tableColumnsWithAliases("col1" :: Nil), + Seq("expected 4 columns but found 1 columns in alias names")) + assertAnalysisError( + tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil), + Seq("expected 4 columns but found 5 columns in alias names")) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala index 82015b1e0671c..afc7ce4195a8b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala @@ -35,6 +35,7 @@ trait AnalysisTest extends PlanTest { val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true) + catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 134e761460881..7a5357eef8f94 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.catalyst.parser -import org.apache.spark.sql.catalyst.FunctionIdentifier -import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedTableValuedFunction} +import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -493,6 +493,13 @@ class PlanParserSuite extends PlanTest { .select(star())) } + test("SPARK-20841 Support table column aliases in FROM clause") { + assertEqual( + "SELECT * FROM testData AS t(col1, col2)", + SubqueryAlias("t", UnresolvedRelation(TableIdentifier("testData"), Seq("col1", "col2"))) + .select(star())) + } + test("inline table") { assertEqual("values 1, 2, 3, 4", UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x))))) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 170c469197e73..f33abc5b2e049 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -49,7 +49,7 @@ class TableIdentifierParserSuite extends SparkFunSuite { "insert", "int", "into", "is", "lateral", "like", "local", "none", "null", "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", - "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike", + "true", "truncate", "update", "user", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", "int", "smallint", "timestamp", "at") diff --git a/sql/core/src/test/resources/sql-tests/inputs/aliases.sql b/sql/core/src/test/resources/sql-tests/inputs/aliases.sql new file mode 100644 index 0000000000000..47e9faab70a74 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/aliases.sql @@ -0,0 +1,14 @@ +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS t(a, b); + +-- Table column aliases in FROM clause +SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1; + +SELECT * FROM testData AS t(col1, col2) WHERE col1 = 2; + +SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k; + +-- Aliasing the wrong number of columns in the FROM clause +SELECT * FROM testData AS t(col1, col2, col3); + +SELECT * FROM testData AS t(col1); diff --git a/sql/core/src/test/resources/sql-tests/results/aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/aliases.sql.out new file mode 100644 index 0000000000000..ef6a6e966b5d2 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/aliases.sql.out @@ -0,0 +1,54 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 6 + + +-- !query 0 +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS t(a, b) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1 +-- !query 1 schema +struct +-- !query 1 output +1 1 +1 2 + + +-- !query 2 +SELECT * FROM testData AS t(col1, col2) WHERE col1 = 2 +-- !query 2 schema +struct +-- !query 2 output +2 1 + + +-- !query 3 +SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k +-- !query 3 schema +struct +-- !query 3 output +1 3 +2 1 + + +-- !query 4 +SELECT * FROM testData AS t(col1, col2, col3) +-- !query 4 schema +struct<> +-- !query 4 output +org.apache.spark.sql.AnalysisException +expected 2 columns but found 3 columns in alias names; line 1 pos 14 + + +-- !query 5 +SELECT * FROM testData AS t(col1) +-- !query 5 schema +struct<> +-- !query 5 output +org.apache.spark.sql.AnalysisException +expected 2 columns but found 1 columns in alias names; line 1 pos 14 diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 28cfb744193ec..4c6af4dfde00c 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 45 +-- Number of queries: 48 -- !query 0 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index a6249ce021400..a23c87514d3a5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -74,13 +74,13 @@ object TPCDSQueryBenchmark { // per-row processing time for those cases. val queryRelations = scala.collection.mutable.HashSet[String]() spark.sql(queryString).queryExecution.logical.map { - case ur @ UnresolvedRelation(t: TableIdentifier) => + case ur @ UnresolvedRelation(t: TableIdentifier, _) => queryRelations.add(t.table) case lp: LogicalPlan => lp.expressions.foreach { _ foreach { case subquery: SubqueryExpression => subquery.plan.foreach { - case ur @ UnresolvedRelation(t: TableIdentifier) => + case ur @ UnresolvedRelation(t: TableIdentifier, _) => queryRelations.add(t.table) case _ => } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index ee9ac21a738dc..e1534c797d55b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -544,7 +544,7 @@ private[hive] class TestHiveQueryExecution( // Make sure any test tables referenced are loaded. val referencedTables = describedTables ++ - logical.collect { case UnresolvedRelation(tableIdent) => tableIdent.table } + logical.collect { case UnresolvedRelation(tableIdent, _) => tableIdent.table } val referencedTestTables = referencedTables.filter(sparkSession.testTables.contains) logDebug(s"Query references test tables: ${referencedTestTables.mkString(", ")}") referencedTestTables.foreach(sparkSession.loadTestTable) From 51dc8962c0cf29cbff60b5490ea2f5ad8a6e108a Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 25 May 2017 10:10:50 +0900 Subject: [PATCH 2/4] Apply xiao's review --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 6 +----- .../spark/sql/catalyst/parser/AstBuilder.scala | 8 ++++---- .../src/test/resources/sql-tests/inputs/aliases.sql | 5 ++++- .../resources/sql-tests/results/aliases.sql.out | 13 +++++++++++-- .../execution/benchmark/TPCDSQueryBenchmark.scala | 4 ++-- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 8753a9a958a29..547013c23fd78 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -472,17 +472,13 @@ identifierComment ; relationPrimary - : catalogTable #tableName + : tableIdentifier sample? tableAlias #tableName | '(' queryNoWith ')' sample? (AS? strictIdentifier) #aliasedQuery | '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation | inlineTable #inlineTableDefault2 | functionTable #tableValuedFunction ; -catalogTable - : tableIdentifier sample? tableAlias - ; - inlineTable : VALUES expression (',' expression)* tableAlias ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 05ecc54e65d6d..1f8a96b1c583d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -676,17 +676,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create an aliased table reference. This is typically used in FROM clauses. */ override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) { - val tableId = visitTableIdentifier(ctx.catalogTable.tableIdentifier) - val table = Option(ctx.catalogTable.tableAlias.identifierList) match { + val tableId = visitTableIdentifier(ctx.tableIdentifier) + val table = Option(ctx.tableAlias.identifierList) match { case Some(aliases) => UnresolvedRelation(tableId, visitIdentifierList(aliases)) case _ => UnresolvedRelation(tableId) } - val tableWithAlias = Option(ctx.catalogTable.tableAlias.strictIdentifier).map(_.getText) match { + val tableWithAlias = Option(ctx.tableAlias.strictIdentifier).map(_.getText) match { case Some(strictIdentifier) => SubqueryAlias(strictIdentifier, table) case _ => table } - tableWithAlias.optionalMap(ctx.catalogTable.sample)(withSample) + tableWithAlias.optionalMap(ctx.sample)(withSample) } /** diff --git a/sql/core/src/test/resources/sql-tests/inputs/aliases.sql b/sql/core/src/test/resources/sql-tests/inputs/aliases.sql index 47e9faab70a74..c90a9c7f85587 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/aliases.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/aliases.sql @@ -1,5 +1,5 @@ -- Test data. -CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS t(a, b); +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b); -- Table column aliases in FROM clause SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1; @@ -12,3 +12,6 @@ SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k; SELECT * FROM testData AS t(col1, col2, col3); SELECT * FROM testData AS t(col1); + +-- Check alias duplication +SELECT a AS col1, b AS col2 FROM testData AS t(c, d); diff --git a/sql/core/src/test/resources/sql-tests/results/aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/aliases.sql.out index ef6a6e966b5d2..3bd55dae9e870 100644 --- a/sql/core/src/test/resources/sql-tests/results/aliases.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/aliases.sql.out @@ -1,9 +1,9 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 6 +-- Number of queries: 7 -- !query 0 -CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS t(a, b) +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b) -- !query 0 schema struct<> -- !query 0 output @@ -52,3 +52,12 @@ struct<> -- !query 5 output org.apache.spark.sql.AnalysisException expected 2 columns but found 1 columns in alias names; line 1 pos 14 + + +-- !query 6 +SELECT a AS col1, b AS col2 FROM testData AS t(c, d) +-- !query 6 schema +struct<> +-- !query 6 output +org.apache.spark.sql.AnalysisException +cannot resolve '`a`' given input columns: [c, d]; line 1 pos 7 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index a23c87514d3a5..6a5b74b01df80 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -74,13 +74,13 @@ object TPCDSQueryBenchmark { // per-row processing time for those cases. val queryRelations = scala.collection.mutable.HashSet[String]() spark.sql(queryString).queryExecution.logical.map { - case ur @ UnresolvedRelation(t: TableIdentifier, _) => + case UnresolvedRelation(t: TableIdentifier, _) => queryRelations.add(t.table) case lp: LogicalPlan => lp.expressions.foreach { _ foreach { case subquery: SubqueryExpression => subquery.plan.foreach { - case ur @ UnresolvedRelation(t: TableIdentifier, _) => + case UnresolvedRelation(t: TableIdentifier, _) => queryRelations.add(t.table) case _ => } From 49186ac60175d3c5ddde33f4120864b834860ba0 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sun, 28 May 2017 18:15:05 +0900 Subject: [PATCH 3/4] Apply more review comments --- .../spark/sql/catalyst/analysis/Analyzer.scala | 16 +++++++++------- .../analysis/ResolveTableValuedFunctions.scala | 5 +++-- .../spark/sql/catalyst/analysis/unresolved.scala | 11 ++++++++++- .../sql/catalyst/analysis/AnalysisSuite.scala | 9 ++++++--- .../resources/sql-tests/results/aliases.sql.out | 4 ++-- .../sql-tests/results/operators.sql.out | 2 +- 6 files changed, 31 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 993180d37ae2f..8818404094eb1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -595,15 +595,17 @@ class Analyzer( val defaultDatabase = AnalysisContext.get.defaultDatabase val foundRelation = lookupTableFromCatalog(u, defaultDatabase) - // If alias names assigned, add `Project` with the aliases - val relation = if (u.outputNames.nonEmpty) { + // Add `Project` to rename output column names if a query has alias names: + // e.g., SELECT col1, col2 FROM testData AS t(col1, col2) + val relation = if (u.outputColumnNames.nonEmpty) { val outputAttrs = foundRelation.output - // Checks if the number of the aliases is equal to expected one - if (u.outputNames.size != outputAttrs.size) { - u.failAnalysis(s"expected ${outputAttrs.size} columns but found " + - s"${u.outputNames.size} columns in alias names") + // Checks if the number of the aliases equals to the number of columns in the table. + if (u.outputColumnNames.size != outputAttrs.size) { + u.failAnalysis(s"Number of column aliases does not match number of columns. " + + s"Table name: ${u.tableName}; number of column aliases: " + + s"${u.outputColumnNames.size}; number of columns: ${outputAttrs.size}.") } - val aliases = outputAttrs.zip(u.outputNames).map { + val aliases = outputAttrs.zip(u.outputColumnNames).map { case (attr, name) => Alias(attr, name)() } Project(aliases, foundRelation) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala index 40675359bec47..a214e59302cd9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala @@ -131,8 +131,9 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] { val outputAttrs = resolvedFunc.output // Checks if the number of the aliases is equal to expected one if (u.outputNames.size != outputAttrs.size) { - u.failAnalysis(s"expected ${outputAttrs.size} columns but " + - s"found ${u.outputNames.size} columns") + u.failAnalysis(s"Number of given aliases does not match number of output columns. " + + s"Function name: ${u.functionName}; number of aliases: " + + s"${u.outputNames.size}; number of output columns: ${outputAttrs.size}.") } val aliases = outputAttrs.zip(u.outputNames).map { case (attr, name) => Alias(attr, name)() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 4a5eecd029071..42b9641bef276 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -41,10 +41,14 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str * // Assign alias names * SELECT col1, col2 FROM testData AS t(col1, col2); * }}} + * + * @param tableIdentifier table name + * @param outputColumnNames alias names of columns. If these names given, an analyzer adds + * [[Project]] to rename the columns. */ case class UnresolvedRelation( tableIdentifier: TableIdentifier, - outputNames: Seq[String] = Seq.empty) + outputColumnNames: Seq[String] = Seq.empty) extends LeafNode { /** Returns a `.` separated name for this relation. */ @@ -80,6 +84,11 @@ case class UnresolvedInlineTable( * // Assign alias names * select t.a from range(10) t(a); * }}} + * + * @param functionName name of this table-value function + * @param functionArgs list of function arguments + * @param outputNames alias names of function output columns. If these names given, an analyzer + * adds [[Project]] to rename the output columns. */ case class UnresolvedTableValuedFunction( functionName: String, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index decd9bb1043a2..5393786891e07 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -465,7 +465,8 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers { assertAnalysisSuccess(rangeWithAliases(2 :: 6 :: 2 :: Nil, "c" :: Nil)) assertAnalysisError( rangeWithAliases(3 :: Nil, "a" :: "b" :: Nil), - Seq("expected 1 columns but found 2 columns")) + Seq("Number of given aliases does not match number of output columns. " + + "Function name: range; number of aliases: 2; number of output columns: 1.")) } test("SPARK-20841 Support table column aliases in FROM clause") { @@ -476,9 +477,11 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers { assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil)) assertAnalysisError( tableColumnsWithAliases("col1" :: Nil), - Seq("expected 4 columns but found 1 columns in alias names")) + Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " + + "number of column aliases: 1; number of columns: 4.")) assertAnalysisError( tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil), - Seq("expected 4 columns but found 5 columns in alias names")) + Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " + + "number of column aliases: 5; number of columns: 4.")) } } diff --git a/sql/core/src/test/resources/sql-tests/results/aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/aliases.sql.out index 3bd55dae9e870..c318018dced29 100644 --- a/sql/core/src/test/resources/sql-tests/results/aliases.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/aliases.sql.out @@ -42,7 +42,7 @@ SELECT * FROM testData AS t(col1, col2, col3) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -expected 2 columns but found 3 columns in alias names; line 1 pos 14 +Number of column aliases does not match number of columns. Table name: testData; number of column aliases: 3; number of columns: 2.; line 1 pos 14 -- !query 5 @@ -51,7 +51,7 @@ SELECT * FROM testData AS t(col1) struct<> -- !query 5 output org.apache.spark.sql.AnalysisException -expected 2 columns but found 1 columns in alias names; line 1 pos 14 +Number of column aliases does not match number of columns. Table name: testData; number of column aliases: 1; number of columns: 2.; line 1 pos 14 -- !query 6 diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 4c6af4dfde00c..28cfb744193ec 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 48 +-- Number of queries: 45 -- !query 0 From 8b505c265170984137d4bb9edf669e0101bd4ea2 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 29 May 2017 01:17:23 +0900 Subject: [PATCH 4/4] Apply Herman's reviews --- .../spark/sql/catalyst/parser/AstBuilder.scala | 15 ++++++++------- .../inputs/{aliases.sql => table-aliases.sql} | 0 .../{aliases.sql.out => table-aliases.sql.out} | 0 3 files changed, 8 insertions(+), 7 deletions(-) rename sql/core/src/test/resources/sql-tests/inputs/{aliases.sql => table-aliases.sql} (100%) rename sql/core/src/test/resources/sql-tests/results/{aliases.sql.out => table-aliases.sql.out} (100%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 1f8a96b1c583d..5f34d0777d5a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -677,14 +677,15 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) { val tableId = visitTableIdentifier(ctx.tableIdentifier) - val table = Option(ctx.tableAlias.identifierList) match { - case Some(aliases) => UnresolvedRelation(tableId, visitIdentifierList(aliases)) - case _ => UnresolvedRelation(tableId) + val table = if (ctx.tableAlias.identifierList != null) { + UnresolvedRelation(tableId, visitIdentifierList(ctx.tableAlias.identifierList)) + } else { + UnresolvedRelation(tableId) } - val tableWithAlias = Option(ctx.tableAlias.strictIdentifier).map(_.getText) match { - case Some(strictIdentifier) => - SubqueryAlias(strictIdentifier, table) - case _ => table + val tableWithAlias = if (ctx.tableAlias.strictIdentifier != null) { + SubqueryAlias(ctx.tableAlias.strictIdentifier.getText, table) + } else { + table } tableWithAlias.optionalMap(ctx.sample)(withSample) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/aliases.sql b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql similarity index 100% rename from sql/core/src/test/resources/sql-tests/inputs/aliases.sql rename to sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql diff --git a/sql/core/src/test/resources/sql-tests/results/aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out similarity index 100% rename from sql/core/src/test/resources/sql-tests/results/aliases.sql.out rename to sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out