Skip to content

Commit f44ead8

Browse files
aokolnychyigatorsmile
authored andcommitted
[SPARK-21538][SQL] Attribute resolution inconsistency in the Dataset API
## What changes were proposed in this pull request? This PR contains a tiny update that removes an attribute resolution inconsistency in the Dataset API. The following example is taken from the ticket description: ``` spark.range(1).withColumnRenamed("id", "x").sort(col("id")) // works spark.range(1).withColumnRenamed("id", "x").sort($"id") // works spark.range(1).withColumnRenamed("id", "x").sort('id) // works spark.range(1).withColumnRenamed("id", "x").sort("id") // fails with: org.apache.spark.sql.AnalysisException: Cannot resolve column name "id" among (x); ``` The above `AnalysisException` happens because the last case calls `Dataset.apply()` to convert strings into columns, which triggers attribute resolution. To make the API consistent between overloaded methods, this PR defers the resolution and constructs columns directly. Author: aokolnychyi <anton.okolnychyi@sap.com> Closes #18740 from aokolnychyi/spark-21538.
1 parent 9f5647d commit f44ead8

2 files changed

Lines changed: 14 additions & 1 deletion

File tree

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1108,7 +1108,7 @@ class Dataset[T] private[sql](
11081108
*/
11091109
@scala.annotation.varargs
11101110
def sort(sortCol: String, sortCols: String*): Dataset[T] = {
1111-
sort((sortCol +: sortCols).map(apply) : _*)
1111+
sort((sortCol +: sortCols).map(Column(_)) : _*)
11121112
}
11131113

11141114
/**

sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
13041304
assert(rlike3.count() == 0)
13051305
}
13061306
}
1307+
1308+
test("SPARK-21538: Attribute resolution inconsistency in Dataset API") {
1309+
val df = spark.range(3).withColumnRenamed("id", "x")
1310+
val expected = Row(0) :: Row(1) :: Row (2) :: Nil
1311+
checkAnswer(df.sort("id"), expected)
1312+
checkAnswer(df.sort(col("id")), expected)
1313+
checkAnswer(df.sort($"id"), expected)
1314+
checkAnswer(df.sort('id), expected)
1315+
checkAnswer(df.orderBy("id"), expected)
1316+
checkAnswer(df.orderBy(col("id")), expected)
1317+
checkAnswer(df.orderBy($"id"), expected)
1318+
checkAnswer(df.orderBy('id), expected)
1319+
}
13071320
}
13081321

13091322
case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])

0 commit comments

Comments
 (0)