apache · gengliangwang · Jul 30, 2019 · Jul 30, 2019 · Jul 31, 2019 · Jul 31, 2019
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -224,7 +224,8 @@ object Block {
       } else {
         args.foreach {
           case _: ExprValue | _: Inline | _: Block =>
-          case _: Boolean | _: Int | _: Long | _: Float | _: Double | _: String =>
+          case _: Boolean | _: Byte | _: Short| _: Int | _: Long | _: Float | _: Double |
+               _: String =>
           case other => throw new IllegalArgumentException(
             s"Can not interpolate ${other.getClass.getName} into code block.")
         }

diff --git a/...src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/...src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
@@ -285,7 +285,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest {
       GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)),
         Seq(unresolved_a, unresolved_b), r1, Seq(unresolved_a, unresolved_b)))
     val expected = Project(Seq(a, b), Sort(
-      Seq(SortOrder('aggOrder.byte.withNullability(false), Ascending)), true,
+      Seq(SortOrder('aggOrder.byte.withNullability(true), Ascending)), true,
       Aggregate(Seq(a, b, gid),
         Seq(a, b, grouping_a.as("aggOrder")),
         Expand(

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -1023,4 +1023,103 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(ret, InternalRow(null))
     }
   }
+
+  private def testIntMaxAndMin(dt: DataType): Unit = {
+    Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
+      checkEvaluation(cast(value, dt), null)
+      checkEvaluation(cast(value.toString, dt), null)
+      checkEvaluation(cast(Decimal(value.toString), dt), null)
+      checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), dt), null)
+      checkEvaluation(cast(Literal(value * 2.0f, FloatType), dt), null)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), dt), null)
+    }
+  }
+
+  private def testLongMaxAndMin(dt: DataType): Unit = {
+    Seq(Decimal(Long.MaxValue) + Decimal(1), Decimal(Long.MinValue) - Decimal(1)).foreach { value =>
+      checkEvaluation(cast(value.toString, dt), null)
+      checkEvaluation(cast(value, dt), null)
+      checkEvaluation(cast((value * Decimal(1.1)).toFloat, dt), null)
+      checkEvaluation(cast((value * Decimal(1.1)).toDouble, dt), null)
+    }
+  }
+
+  test("Cast to byte") {
+    testIntMaxAndMin(ByteType)
+    Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
+      checkEvaluation(cast(value, ByteType), null)
+      checkEvaluation(cast(value.toString, ByteType), null)
+      checkEvaluation(cast(Decimal(value.toString), ByteType), null)
+      checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), null)
+      checkEvaluation(cast(Literal(value, DateType), ByteType), null)
+      checkEvaluation(cast(Literal(value * 1.0f, FloatType), ByteType), null)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), ByteType), null)
+    }
+
+    Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ByteType), value)
+      checkEvaluation(cast(value.toString, ByteType), value)
+      checkEvaluation(cast(Decimal(value.toString), ByteType), value)
+      checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), value)
+      checkEvaluation(cast(Literal(value.toInt, DateType), ByteType), null)
+      checkEvaluation(cast(Literal(value * 1.0f, FloatType), ByteType), value)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), ByteType), value)
+    }
+  }
+
+  test("Cast to short") {
+    testIntMaxAndMin(ShortType)
+    Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
+      checkEvaluation(cast(value, ShortType), null)
+      checkEvaluation(cast(value.toString, ShortType), null)
+      checkEvaluation(cast(Decimal(value.toString), ShortType), null)
+      checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), null)
+      checkEvaluation(cast(Literal(value, DateType), ShortType), null)
+      checkEvaluation(cast(Literal(value * 1.0f, FloatType), ShortType), null)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), ShortType), null)
+    }
+
+    Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ShortType), value)
+      checkEvaluation(cast(value.toString, ShortType), value)
+      checkEvaluation(cast(Decimal(value.toString), ShortType), value)
+      checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), value)
+      checkEvaluation(cast(Literal(value.toInt, DateType), ShortType), null)
+      checkEvaluation(cast(Literal(value * 1.0f, FloatType), ShortType), value)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), ShortType), value)
+    }
+  }
+
+  test("Cast to int") {
+    testIntMaxAndMin(IntegerType)
+    testLongMaxAndMin(IntegerType)
+
+    Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
+      checkEvaluation(cast(value, IntegerType), value)
+      checkEvaluation(cast(value.toString, IntegerType), value)
+      checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
+      checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), IntegerType), value)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
+    }
+    checkEvaluation(cast(2147483647.4f, IntegerType), 2147483647)
+    checkEvaluation(cast(-2147483648.4f, IntegerType), -2147483648)
+    checkEvaluation(cast(2147483647.4D, IntegerType), 2147483647)
+    checkEvaluation(cast(-2147483648.4D, IntegerType), -2147483648)
+  }
+
+  test("Cast to long") {
+    testLongMaxAndMin(LongType)
+
+    Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
+      checkEvaluation(cast(value, LongType), value)
+      checkEvaluation(cast(value.toString, LongType), value)
+      checkEvaluation(cast(Decimal(value.toString), LongType), value)
+      checkEvaluation(cast(Literal(value, TimestampType), LongType),
+        Math.floorDiv(value, MICROS_PER_SECOND))
+    }
+    checkEvaluation(cast(9223372036854775807.4f, LongType), 9223372036854775807L)
+    checkEvaluation(cast(-9223372036854775808.4f, LongType), -9223372036854775808L)
+    checkEvaluation(cast(9223372036854775807.4D, LongType), 9223372036854775807L)
+    checkEvaluation(cast(-9223372036854775808.4D, LongType), -9223372036854775808L)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out
@@ -336,7 +336,7 @@ SELECT int(float('-2147483900'))
 -- !query 37 schema
 struct<CAST(CAST(-2147483900 AS FLOAT) AS INT):int>
 -- !query 37 output
--2147483648
+NULL
 
 
 -- !query 38
@@ -368,7 +368,7 @@ SELECT bigint(float('-9223380000000000000'))
 -- !query 41 schema
 struct<CAST(CAST(-9223380000000000000 AS FLOAT) AS BIGINT):bigint>
 -- !query 41 output
--9223372036854775808
+NULL
 
 
 -- !query 42

diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out
@@ -828,7 +828,7 @@ SELECT bigint(double('-9223372036854780000'))
 -- !query 93 schema
 struct<CAST(CAST(-9223372036854780000 AS DOUBLE) AS BIGINT):bigint>
 -- !query 93 output
--9223372036854775808
+NULL
 
 
 -- !query 94

diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out
@@ -606,10 +606,10 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456
 -- !query 60 schema
 struct<q1:int>
 -- !query 60 output
--869367531
--869367531
--869367531
 123
+NULL
+NULL
+NULL
 
 
 -- !query 61
@@ -625,10 +625,10 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456
 -- !query 62 schema
 struct<q1:smallint>
 -- !query 62 output
--32491
--32491
--32491
 123
+NULL
+NULL
+NULL
 
 
 -- !query 63
@@ -664,7 +664,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint)
 -- !query 66 schema
 struct<CAST(CAST(922337203685477580700.0 AS DOUBLE) AS BIGINT):bigint>
 -- !query 66 output
-9223372036854775807
+NULL
 
 
 -- !query 67
@@ -730,7 +730,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1)
 -- !query 72 schema
 struct<CAST((CAST(shiftleft(CAST(-1 AS BIGINT), 63) AS INT) + 1) AS STRING):string>
 -- !query 72 output
-1
+NULL
 
 
 -- !query 73

diff --git a/...atibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/...atibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -246,6 +246,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // Limit clause without a ordering, which causes failure.
     "orc_predicate_pushdown",
 
+    // On casting a out-of-range value to a integral type, Hive returns the low-order bits, while
+    // Spark returns null.
+    "udf_to_byte",
+
     // Requires precision decimal support:
     "udf_when",
     "udf_case",
@@ -1086,7 +1090,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_sum",
     "udf_tan",
     "udf_tinyint",
-    "udf_to_byte",
     "udf_to_date",
     "udf_to_double",
     "udf_to_float",