Skip to content

Commit 0515f49

Browse files
committed
[SPARK-34856][SQL] ANSI mode: Allow casting complex types as string type
### What changes were proposed in this pull request? Allow casting complex types as string type in ANSI mode. ### Why are the changes needed? Currently, complex types are not allowed to cast as string type. This breaks the DataFrame.show() API. E.g ``` scala> sql(“select array(1, 2, 2)“).show(false) org.apache.spark.sql.AnalysisException: cannot resolve ‘CAST(`array(1, 2, 2)` AS STRING)’ due to data type mismatch: cannot cast array<int> to string with ANSI mode on. ``` We should allow the conversion as the extension of the ANSI SQL standard, so that the DataFrame.show() still work in ANSI mode. ### Does this PR introduce _any_ user-facing change? Yes, casting complex types as string type is now allowed in ANSI mode. ### How was this patch tested? Unit tests. Closes #31954 from gengliangwang/fixExplicitCast. Authored-by: Gengliang Wang <[email protected]> Signed-off-by: Gengliang Wang <[email protected]>
1 parent 0d91f9c commit 0515f49

3 files changed

Lines changed: 119 additions & 127 deletions

File tree

docs/sql-ref-ansi-compliance.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ The type conversion of Spark ANSI mode follows the syntax rules of section 6.13
7676
straightforward type conversions which are disallowed as per the ANSI standard:
7777
* NumericType <=> BooleanType
7878
* StringType <=> BinaryType
79+
* ArrayType => String
80+
* MapType => String
81+
* StructType => String
7982

8083
The valid combinations of target data type and source data type in a `CAST` expression are given by the following table.
8184
“Y” indicates that the combination is syntactically valid without restriction and “N” indicates that the combination is not valid.
@@ -89,9 +92,9 @@ The type conversion of Spark ANSI mode follows the syntax rules of section 6.13
8992
| Interval | N | Y | N | N | Y | N | N | N | N | N |
9093
| Boolean | Y | Y | N | N | N | Y | N | N | N | N |
9194
| Binary | N | Y | N | N | N | N | Y | N | N | N |
92-
| Array | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N | N |
93-
| Map | N | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N |
94-
| Struct | N | N | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> |
95+
| Array | N | Y | N | N | N | N | N | <span style="color:red">**Y**</span> | N | N |
96+
| Map | N | Y | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N |
97+
| Struct | N | Y | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> |
9598

9699
In the table above, all the `CAST`s that can cause runtime exceptions are marked as red <span style="color:red">**Y**</span>:
97100
* CAST(Numeric AS Numeric): raise an overflow exception if the value is out of the target data type's range.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,6 +1873,8 @@ object AnsiCast {
18731873

18741874
case (NullType, _) => true
18751875

1876+
case (_, StringType) => true
1877+
18761878
case (StringType, _: BinaryType) => true
18771879

18781880
case (StringType, BooleanType) => true
@@ -1890,13 +1892,6 @@ object AnsiCast {
18901892
case (StringType, _: NumericType) => true
18911893
case (BooleanType, _: NumericType) => true
18921894

1893-
case (_: NumericType, StringType) => true
1894-
case (_: DateType, StringType) => true
1895-
case (_: TimestampType, StringType) => true
1896-
case (_: CalendarIntervalType, StringType) => true
1897-
case (BooleanType, StringType) => true
1898-
case (BinaryType, StringType) => true
1899-
19001895
case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
19011896
canCast(fromType, toType) &&
19021897
resolvableNullability(fn || forceNullable(fromType, toType), tn)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala

Lines changed: 111 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,117 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
686686
checkEvaluation(cast(value, DoubleType), Double.NaN)
687687
}
688688
}
689+
690+
test("SPARK-22825 Cast array to string") {
691+
val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
692+
checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
693+
val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
694+
checkEvaluation(ret2, "[ab, cde, f]")
695+
Seq(false, true).foreach { omitNull =>
696+
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
697+
val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
698+
checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
699+
}
700+
}
701+
val ret4 =
702+
cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
703+
checkEvaluation(ret4, "[ab, cde, f]")
704+
val ret5 = cast(
705+
Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
706+
StringType)
707+
checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
708+
val ret6 = cast(
709+
Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
710+
.map(Timestamp.valueOf)),
711+
StringType)
712+
checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
713+
val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
714+
checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
715+
val ret8 = cast(
716+
Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
717+
StringType)
718+
checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
719+
}
720+
721+
test("SPARK-33291: Cast array with null elements to string") {
722+
Seq(false, true).foreach { omitNull =>
723+
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
724+
val ret1 = cast(Literal.create(Array(null, null)), StringType)
725+
checkEvaluation(
726+
ret1,
727+
s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
728+
}
729+
}
730+
}
731+
732+
test("SPARK-22973 Cast map to string") {
733+
Seq(
734+
false -> ("{", "}"),
735+
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
736+
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
737+
val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
738+
checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
739+
val ret2 = cast(
740+
Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
741+
StringType)
742+
checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
743+
val ret3 = cast(
744+
Literal.create(Map(
745+
1 -> Date.valueOf("2014-12-03"),
746+
2 -> Date.valueOf("2014-12-04"),
747+
3 -> Date.valueOf("2014-12-05"))),
748+
StringType)
749+
checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
750+
val ret4 = cast(
751+
Literal.create(Map(
752+
1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
753+
2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
754+
StringType)
755+
checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
756+
val ret5 = cast(
757+
Literal.create(Map(
758+
1 -> Array(1, 2, 3),
759+
2 -> Array(4, 5, 6))),
760+
StringType)
761+
checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
762+
}
763+
}
764+
}
765+
766+
test("SPARK-22981 Cast struct to string") {
767+
Seq(
768+
false -> ("{", "}"),
769+
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
770+
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
771+
val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
772+
checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
773+
val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
774+
checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
775+
val ret3 = cast(Literal.create(
776+
(Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
777+
checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
778+
val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
779+
checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
780+
val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
781+
checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
782+
val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
783+
checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
784+
}
785+
}
786+
}
787+
788+
test("SPARK-33291: Cast struct with null elements to string") {
789+
Seq(
790+
false -> ("{", "}"),
791+
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
792+
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
793+
val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
794+
checkEvaluation(
795+
ret1,
796+
s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
797+
}
798+
}
799+
}
689800
}
690801

691802
abstract class AnsiCastSuiteBase extends CastSuiteBase {
@@ -851,12 +962,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
851962
assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure)
852963
}
853964

854-
test("ANSI mode: disallow casting complex types as String type") {
855-
verifyCastFailure(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType))
856-
verifyCastFailure(cast(Literal.create(Map(1 -> "a")), StringType))
857-
verifyCastFailure(cast(Literal.create((1, "a", 0.1)), StringType))
858-
}
859-
860965
test("cast from invalid string to numeric should throw NumberFormatException") {
861966
// cast to IntegerType
862967
Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
@@ -1569,117 +1674,6 @@ class CastSuite extends CastSuiteBase {
15691674
checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
15701675
}
15711676

1572-
test("SPARK-22825 Cast array to string") {
1573-
val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
1574-
checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
1575-
val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
1576-
checkEvaluation(ret2, "[ab, cde, f]")
1577-
Seq(false, true).foreach { omitNull =>
1578-
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
1579-
val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
1580-
checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
1581-
}
1582-
}
1583-
val ret4 =
1584-
cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
1585-
checkEvaluation(ret4, "[ab, cde, f]")
1586-
val ret5 = cast(
1587-
Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
1588-
StringType)
1589-
checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
1590-
val ret6 = cast(
1591-
Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
1592-
.map(Timestamp.valueOf)),
1593-
StringType)
1594-
checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
1595-
val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
1596-
checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
1597-
val ret8 = cast(
1598-
Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
1599-
StringType)
1600-
checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
1601-
}
1602-
1603-
test("SPARK-33291: Cast array with null elements to string") {
1604-
Seq(false, true).foreach { omitNull =>
1605-
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
1606-
val ret1 = cast(Literal.create(Array(null, null)), StringType)
1607-
checkEvaluation(
1608-
ret1,
1609-
s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
1610-
}
1611-
}
1612-
}
1613-
1614-
test("SPARK-22973 Cast map to string") {
1615-
Seq(
1616-
false -> ("{", "}"),
1617-
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
1618-
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
1619-
val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
1620-
checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
1621-
val ret2 = cast(
1622-
Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
1623-
StringType)
1624-
checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
1625-
val ret3 = cast(
1626-
Literal.create(Map(
1627-
1 -> Date.valueOf("2014-12-03"),
1628-
2 -> Date.valueOf("2014-12-04"),
1629-
3 -> Date.valueOf("2014-12-05"))),
1630-
StringType)
1631-
checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
1632-
val ret4 = cast(
1633-
Literal.create(Map(
1634-
1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
1635-
2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
1636-
StringType)
1637-
checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
1638-
val ret5 = cast(
1639-
Literal.create(Map(
1640-
1 -> Array(1, 2, 3),
1641-
2 -> Array(4, 5, 6))),
1642-
StringType)
1643-
checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
1644-
}
1645-
}
1646-
}
1647-
1648-
test("SPARK-22981 Cast struct to string") {
1649-
Seq(
1650-
false -> ("{", "}"),
1651-
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
1652-
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
1653-
val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
1654-
checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
1655-
val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
1656-
checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
1657-
val ret3 = cast(Literal.create(
1658-
(Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
1659-
checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
1660-
val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
1661-
checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
1662-
val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
1663-
checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
1664-
val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
1665-
checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
1666-
}
1667-
}
1668-
}
1669-
1670-
test("SPARK-33291: Cast struct with null elements to string") {
1671-
Seq(
1672-
false -> ("{", "}"),
1673-
true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
1674-
withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
1675-
val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
1676-
checkEvaluation(
1677-
ret1,
1678-
s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
1679-
}
1680-
}
1681-
}
1682-
16831677
test("data type casting II") {
16841678
checkEvaluation(
16851679
cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),

0 commit comments

Comments
 (0)