diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md index f955e31d49a88..b2e98aa113ab1 100644 --- a/docs/sql-migration-guide-hive-compatibility.md +++ b/docs/sql-migration-guide-hive-compatibility.md @@ -165,3 +165,12 @@ Below are the scenarios in which Hive and Spark generate different results: * `SQRT(n)` If n < 0, Hive returns null, Spark SQL returns NaN. * `ACOS(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN. * `ASIN(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN. + +### Incompatible data type conversion + +For type conversion, if the value is too big to fit in the target integral data type, Spark will return `null`, +while Hive always returns lower-order bits. The related integral data types are: +* Byte +* Short +* Int +* Long diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index e9d99b66353e2..846e85d51c31e 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -23,6 +23,8 @@ license: | {:toc} ## Upgrading From Spark SQL 2.4 to 3.0 + - Since Spark 3.0, for type conversion, if the value is too big to fit in the target `Long`/`Int`/`Short`/`Byte` data type, Spark will return `null`. In Spark version 2.4 and earlier, Spark always returns the lower-order bits of the out-of-range value. For example, the result of `Cast(257, ByteType)` will be `null`, instead of `1`. + - Since Spark 3.0, we reversed argument order of the trim function from `TRIM(trimStr, str)` to `TRIM(str, trimStr)` to be compatible with other databases. - Since Spark 3.0, PySpark requires a Pandas version of 0.23.2 or higher to use Pandas related functionality, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 85bad74850dc1..02692c6eb0e5b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -180,13 +180,14 @@ object Cast { case (FloatType | DoubleType, TimestampType) => true case (TimestampType, DateType) => false + case (TimestampType, _: IntegralType) if to != LongType => true case (_, DateType) => true case (DateType, TimestampType) => false case (DateType, _) => true case (_, CalendarIntervalType) => true case (_, to: DecimalType) if !canNullSafeCastToDecimal(from, to) => true - case (_: FractionalType, _: IntegralType) => true // NaN, infinity + case (_: NumericType, _: IntegralType) if !legalNumericPrecedence(from, to) => true case _ => false } @@ -442,8 +443,38 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String buildCast[Int](_, d => null) case TimestampType => buildCast[Long](_, t => timestampToLong(t)) - case x: NumericType => - b => x.numeric.asInstanceOf[Numeric[Any]].toLong(b) + case ByteType => + b => b.asInstanceOf[Byte].toLong + case ShortType => + b => b.asInstanceOf[Short].toLong + case IntegerType => + b => b.asInstanceOf[Int].toLong + case FloatType => + buildCast[Float](_, f => + if (f <= Long.MaxValue && f >= Long.MinValue) { + f.toLong + } else { + null + } + ) + case DoubleType => + buildCast[Double](_, d => + if (d <= Long.MaxValue && d >= Long.MinValue) { + d.toLong + } else { + null + } + ) + case _: DecimalType => + val longMaxValueAsDecimal = Decimal(Long.MaxValue) + val longMinValueAsDecimal = Decimal(Long.MinValue) + buildCast[Decimal](_, d => + if (d <= longMaxValueAsDecimal && d >= longMinValueAsDecimal) { + d.toLong + } else { + null + } + ) } // IntConverter @@ -456,9 +487,54 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case DateType => buildCast[Int](_, d => null) case TimestampType => - buildCast[Long](_, t => timestampToLong(t).toInt) - case x: NumericType => - b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b) + buildCast[Long](_, t => { + val longValue = timestampToLong(t) + if (longValue == longValue.toInt) { + longValue.toInt + } else { + null + } + }) + case ByteType => + b => b.asInstanceOf[Byte].toInt + case ShortType => + b => b.asInstanceOf[Short].toInt + case LongType => + buildCast[Long](_, l => + if (l == l.toInt) { + l.toInt + } else { + null + } + ) + case FloatType => + buildCast[Float](_, f => + if (f <= Int.MaxValue && f >= Int.MinValue) { + f.toInt + } else { + null + } + ) + case DoubleType => + val upperBound = Int.MaxValue + 1L + val lowerBound = Int.MinValue - 1L + buildCast[Double](_, d => + if (d < upperBound && d > lowerBound) { + d.toInt + } else { + null + } + ) + case _: DecimalType => + val upperBound = Decimal(Int.MaxValue + 1L) + val lowerBound = Decimal(Int.MinValue - 1L) + buildCast[Decimal](_, d => + if (d < upperBound && d > lowerBound) { + d.toInt + } else { + null + } + ) } // ShortConverter @@ -475,9 +551,62 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case DateType => buildCast[Int](_, d => null) case TimestampType => - buildCast[Long](_, t => timestampToLong(t).toShort) - case x: NumericType => - b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toShort + buildCast[Long](_, t => { + val longValue = timestampToLong(t) + if (longValue == longValue.toShort) { + longValue.toShort + } else { + null + } + }) + case ByteType => + b => b.asInstanceOf[Byte].toShort + case IntegerType => + buildCast[Int](_, i => { + if (i == i.toShort) { + i.toShort + } else { + null + } + }) + case LongType => + buildCast[Long](_, i => { + if (i == i.toShort) { + i.toShort + } else { + null + } + }) + case FloatType => + val upperBound = Short.MaxValue + 1 + val lowerBound = Short.MinValue - 1 + buildCast[Float](_, f => + if (f < upperBound && f > lowerBound) { + f.toShort + } else { + null + } + ) + case DoubleType => + val upperBound = Short.MaxValue + 1 + val lowerBound = Short.MinValue - 1 + buildCast[Double](_, d => + if (d < upperBound && d > lowerBound) { + d.toShort + } else { + null + } + ) + case _: DecimalType => + val upperBound = Decimal(Short.MaxValue + 1) + val lowerBound = Decimal(Short.MinValue - 1) + buildCast[Decimal](_, d => + if (d < upperBound && d > lowerBound) { + d.toShort + } else { + null + } + ) } // ByteConverter @@ -494,9 +623,68 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case DateType => buildCast[Int](_, d => null) case TimestampType => - buildCast[Long](_, t => timestampToLong(t).toByte) - case x: NumericType => - b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toByte + buildCast[Long](_, t => { + val longValue = timestampToLong(t) + if (longValue == longValue.toByte) { + longValue.toByte + } else { + null + } + }) + case ShortType => + buildCast[Short](_, i => { + if (i == i.toByte) { + i.toByte + } else { + null + } + }) + case IntegerType => + buildCast[Int](_, i => { + if (i == i.toByte) { + i.toByte + } else { + null + } + }) + case LongType => + buildCast[Long](_, i => { + if (i == i.toByte) { + i.toByte + } else { + null + } + }) + case FloatType => + val upperBound = Byte.MaxValue + 1 + val lowerBound = Byte.MinValue - 1 + buildCast[Float](_, f => + if (f < upperBound && f > lowerBound) { + f.toByte + } else { + null + } + ) + case DoubleType => + val upperBound = Byte.MaxValue + 1 + val lowerBound = Byte.MinValue - 1 + buildCast[Double](_, d => + if (d < upperBound && d > lowerBound) { + d.toByte + } else { + null + } + ) + case _: DecimalType => + val upperBound = Decimal(Byte.MaxValue + 1) + val lowerBound = Decimal(Byte.MinValue - 1) + buildCast[Decimal](_, d => + if (d < upperBound && d > lowerBound) { + d.toByte + } else { + null + } + ) } /** @@ -1150,11 +1338,45 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case DateType => (c, evPrim, evNull) => code"$evNull = true;" case TimestampType => - (c, evPrim, evNull) => code"$evPrim = (byte) ${timestampToIntegerCode(c)};" + val longValue = ctx.freshName("longValue") + (c, evPrim, evNull) => + code""" + long $longValue = ${timestampToIntegerCode(c)}; + if ($longValue == (byte) $longValue) { + $evPrim = (byte) $longValue; + } else { + $evNull = true; + } + """ case DecimalType() => - (c, evPrim, evNull) => code"$evPrim = $c.toByte();" - case x: NumericType => - (c, evPrim, evNull) => code"$evPrim = (byte) $c;" + val floatValue = ctx.freshName("floatValue") + (c, evPrim, evNull) => + code""" + float $floatValue = $c.toFloat(); + if ($floatValue < ${Byte.MaxValue + 1} && $floatValue > ${Byte.MinValue - 1}) { + $evPrim = $c.toByte(); + } else { + $evNull = true; + } + """ + case _: ShortType | _: IntegerType | _: LongType => + (c, evPrim, evNull) => + code""" + if ($c == (byte) $c) { + $evPrim = (byte) $c; + } else { + $evNull = true; + } + """ + case _: FloatType | _: DoubleType => + (c, evPrim, evNull) => + code""" + if ($c > ${Byte.MinValue - 1} && $c < ${Byte.MaxValue + 1}) { + $evPrim = (byte) $c; + } else { + $evNull = true; + } + """ } private[this] def castToShortCode( @@ -1177,11 +1399,47 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case DateType => (c, evPrim, evNull) => code"$evNull = true;" case TimestampType => - (c, evPrim, evNull) => code"$evPrim = (short) ${timestampToIntegerCode(c)};" + val longValue = ctx.freshName("longValue") + (c, evPrim, evNull) => + code""" + long $longValue = ${timestampToIntegerCode(c)}; + if ($longValue == (short) $longValue) { + $evPrim = (short) $longValue; + } else { + $evNull = true; + } + """ case DecimalType() => - (c, evPrim, evNull) => code"$evPrim = $c.toShort();" - case x: NumericType => - (c, evPrim, evNull) => code"$evPrim = (short) $c;" + val floatValue = ctx.freshName("floatValue") + (c, evPrim, evNull) => + code""" + float $floatValue = $c.toFloat(); + if ($floatValue < ${Short.MaxValue + 1} && $floatValue > ${Short.MinValue - 1}) { + $evPrim = $c.toShort(); + } else { + $evNull = true; + } + """ + case _: ByteType => + (c, evPrim, evNull) => code"$evPrim = $c;" + case _: IntegerType | _: LongType => + (c, evPrim, evNull) => + code""" + if ($c == (short) $c) { + $evPrim = (short) $c; + } else { + $evNull = true; + } + """ + case _: FloatType | _: DoubleType => + (c, evPrim, evNull) => + code""" + if ($c > ${Short.MinValue - 1} && $c < ${Short.MaxValue + 1}) { + $evPrim = (short) $c; + } else { + $evNull = true; + } + """ } private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match { @@ -1202,11 +1460,56 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case DateType => (c, evPrim, evNull) => code"$evNull = true;" case TimestampType => - (c, evPrim, evNull) => code"$evPrim = (int) ${timestampToIntegerCode(c)};" + val longValue = ctx.freshName("longValue") + (c, evPrim, evNull) => + code""" + long $longValue = ${timestampToIntegerCode(c)}; + if ($longValue == (int) $longValue) { + $evPrim = (int) $longValue; + } else { + $evNull = true; + } + """ case DecimalType() => - (c, evPrim, evNull) => code"$evPrim = $c.toInt();" - case x: NumericType => - (c, evPrim, evNull) => code"$evPrim = (int) $c;" + val doubleValue = ctx.freshName("doubleValue") + (c, evPrim, evNull) => + code""" + double $doubleValue = $c.toDouble(); + if ($doubleValue > ${Int.MinValue - 1L}L && $doubleValue < ${Int.MaxValue + 1L}L) { + $evPrim = $c.toInt(); + } else { + $evNull = true; + } + """ + case _: ByteType | _: ShortType | _: IntegerType => + (c, evPrim, evNull) => code"$evPrim = $c;" + case _: LongType => + (c, evPrim, evNull) => + code""" + if ($c == (int) $c) { + $evPrim = (int) $c; + } else { + $evNull = true; + } + """ + case _: FloatType => + (c, evPrim, evNull) => + code""" + if ($c >= ${Int.MinValue} && $c <= ${Int.MaxValue}) { + $evPrim = (int) $c; + } else { + $evNull = true; + } + """ + case _: DoubleType => + (c, evPrim, evNull) => + code""" + if ($c > ${Int.MinValue - 1L}L && $c < ${Int.MaxValue + 1L}L) { + $evPrim = (int) $c; + } else { + $evNull = true; + } + """ } private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match { @@ -1230,9 +1533,25 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case TimestampType => (c, evPrim, evNull) => code"$evPrim = (long) ${timestampToIntegerCode(c)};" case DecimalType() => - (c, evPrim, evNull) => code"$evPrim = $c.toLong();" - case x: NumericType => - (c, evPrim, evNull) => code"$evPrim = (long) $c;" + (c, evPrim, evNull) => + code""" + try { + $evPrim = $c.toJavaBigInteger().longValueExact(); + } catch (ArithmeticException e) { + $evNull = true; + } + """ + case _: ByteType | _: ShortType | _: IntegerType | _: LongType => + (c, evPrim, evNull) => code"$evPrim = $c;" + case _: FloatType | _: DoubleType => + (c, evPrim, evNull) => + code""" + if ($c >= ${Long.MinValue}L && $c <= ${Long.MaxValue}L) { + $evPrim = (long) $c; + } else { + $evNull = true; + } + """ } private[this] def castToFloatCode(from: DataType): CastFunction = from match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala index c3b79900d308d..24d61d102d6e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala @@ -224,7 +224,8 @@ object Block { } else { args.foreach { case _: ExprValue | _: Inline | _: Block => - case _: Boolean | _: Int | _: Long | _: Float | _: Double | _: String => + case _: Boolean | _: Byte | _: Short| _: Int | _: Long | _: Float | _: Double | + _: String => case other => throw new IllegalArgumentException( s"Can not interpolate ${other.getClass.getName} into code block.") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala index aa5eda8e5ba87..0b62cf527922d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala @@ -285,7 +285,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest { GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)), Seq(unresolved_a, unresolved_b), r1, Seq(unresolved_a, unresolved_b))) val expected = Project(Seq(a, b), Sort( - Seq(SortOrder('aggOrder.byte.withNullability(false), Ascending)), true, + Seq(SortOrder('aggOrder.byte.withNullability(true), Ascending)), true, Aggregate(Seq(a, b, gid), Seq(a, b, grouping_a.as("aggOrder")), Expand( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 44825c79781d9..ff5cd6e01a825 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1023,4 +1023,103 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(ret, InternalRow(null)) } } + + private def testIntMaxAndMin(dt: DataType): Unit = { + Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value => + checkEvaluation(cast(value, dt), null) + checkEvaluation(cast(value.toString, dt), null) + checkEvaluation(cast(Decimal(value.toString), dt), null) + checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), dt), null) + checkEvaluation(cast(Literal(value * 2.0f, FloatType), dt), null) + checkEvaluation(cast(Literal(value * 1.0, DoubleType), dt), null) + } + } + + private def testLongMaxAndMin(dt: DataType): Unit = { + Seq(Decimal(Long.MaxValue) + Decimal(1), Decimal(Long.MinValue) - Decimal(1)).foreach { value => + checkEvaluation(cast(value.toString, dt), null) + checkEvaluation(cast(value, dt), null) + checkEvaluation(cast((value * Decimal(1.1)).toFloat, dt), null) + checkEvaluation(cast((value * Decimal(1.1)).toDouble, dt), null) + } + } + + test("Cast to byte") { + testIntMaxAndMin(ByteType) + Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value => + checkEvaluation(cast(value, ByteType), null) + checkEvaluation(cast(value.toString, ByteType), null) + checkEvaluation(cast(Decimal(value.toString), ByteType), null) + checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), null) + checkEvaluation(cast(Literal(value, DateType), ByteType), null) + checkEvaluation(cast(Literal(value * 1.0f, FloatType), ByteType), null) + checkEvaluation(cast(Literal(value * 1.0, DoubleType), ByteType), null) + } + + Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value => + checkEvaluation(cast(value, ByteType), value) + checkEvaluation(cast(value.toString, ByteType), value) + checkEvaluation(cast(Decimal(value.toString), ByteType), value) + checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), value) + checkEvaluation(cast(Literal(value.toInt, DateType), ByteType), null) + checkEvaluation(cast(Literal(value * 1.0f, FloatType), ByteType), value) + checkEvaluation(cast(Literal(value * 1.0, DoubleType), ByteType), value) + } + } + + test("Cast to short") { + testIntMaxAndMin(ShortType) + Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value => + checkEvaluation(cast(value, ShortType), null) + checkEvaluation(cast(value.toString, ShortType), null) + checkEvaluation(cast(Decimal(value.toString), ShortType), null) + checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), null) + checkEvaluation(cast(Literal(value, DateType), ShortType), null) + checkEvaluation(cast(Literal(value * 1.0f, FloatType), ShortType), null) + checkEvaluation(cast(Literal(value * 1.0, DoubleType), ShortType), null) + } + + Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value => + checkEvaluation(cast(value, ShortType), value) + checkEvaluation(cast(value.toString, ShortType), value) + checkEvaluation(cast(Decimal(value.toString), ShortType), value) + checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), value) + checkEvaluation(cast(Literal(value.toInt, DateType), ShortType), null) + checkEvaluation(cast(Literal(value * 1.0f, FloatType), ShortType), value) + checkEvaluation(cast(Literal(value * 1.0, DoubleType), ShortType), value) + } + } + + test("Cast to int") { + testIntMaxAndMin(IntegerType) + testLongMaxAndMin(IntegerType) + + Seq(Int.MaxValue, 0, Int.MinValue).foreach { value => + checkEvaluation(cast(value, IntegerType), value) + checkEvaluation(cast(value.toString, IntegerType), value) + checkEvaluation(cast(Decimal(value.toString), IntegerType), value) + checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), IntegerType), value) + checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value) + } + checkEvaluation(cast(2147483647.4f, IntegerType), 2147483647) + checkEvaluation(cast(-2147483648.4f, IntegerType), -2147483648) + checkEvaluation(cast(2147483647.4D, IntegerType), 2147483647) + checkEvaluation(cast(-2147483648.4D, IntegerType), -2147483648) + } + + test("Cast to long") { + testLongMaxAndMin(LongType) + + Seq(Long.MaxValue, 0, Long.MinValue).foreach { value => + checkEvaluation(cast(value, LongType), value) + checkEvaluation(cast(value.toString, LongType), value) + checkEvaluation(cast(Decimal(value.toString), LongType), value) + checkEvaluation(cast(Literal(value, TimestampType), LongType), + Math.floorDiv(value, MICROS_PER_SECOND)) + } + checkEvaluation(cast(9223372036854775807.4f, LongType), 9223372036854775807L) + checkEvaluation(cast(-9223372036854775808.4f, LongType), -9223372036854775808L) + checkEvaluation(cast(9223372036854775807.4D, LongType), 9223372036854775807L) + checkEvaluation(cast(-9223372036854775808.4D, LongType), -9223372036854775808L) + } } diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out index f25b7f5911aeb..bc8d493a5864e 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out @@ -336,7 +336,7 @@ SELECT int(float('-2147483900')) -- !query 37 schema struct -- !query 37 output --2147483648 +NULL -- !query 38 @@ -368,7 +368,7 @@ SELECT bigint(float('-9223380000000000000')) -- !query 41 schema struct -- !query 41 output --9223372036854775808 +NULL -- !query 42 diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out index 3e3f24d603ff0..60fb9d65302b8 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out @@ -828,7 +828,7 @@ SELECT bigint(double('-9223372036854780000')) -- !query 93 schema struct -- !query 93 output --9223372036854775808 +NULL -- !query 94 diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out index 6d7fae19aa7e4..0c0f27ecd0bd5 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out @@ -606,10 +606,10 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456 -- !query 60 schema struct -- !query 60 output --869367531 --869367531 --869367531 123 +NULL +NULL +NULL -- !query 61 @@ -625,10 +625,10 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456 -- !query 62 schema struct -- !query 62 output --32491 --32491 --32491 123 +NULL +NULL +NULL -- !query 63 @@ -664,7 +664,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint) -- !query 66 schema struct -- !query 66 output -9223372036854775807 +NULL -- !query 67 @@ -730,7 +730,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1) -- !query 72 schema struct -- !query 72 output -1 +NULL -- !query 73 diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index cebaad5b4ad9b..8eec6227a2a26 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -246,6 +246,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Limit clause without a ordering, which causes failure. "orc_predicate_pushdown", + // On casting a out-of-range value to a integral type, Hive returns the low-order bits, while + // Spark returns null. + "udf_to_byte", + // Requires precision decimal support: "udf_when", "udf_case", @@ -1086,7 +1090,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_sum", "udf_tan", "udf_tinyint", - "udf_to_byte", "udf_to_date", "udf_to_double", "udf_to_float",