Skip to content
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ object Block {
} else {
args.foreach {
case _: ExprValue | _: Inline | _: Block =>
case _: Boolean | _: Int | _: Long | _: Float | _: Double | _: String =>
case _: Boolean | _: Byte | _: Short| _: Int | _: Long | _: Float | _: Double |
_: String =>
case other => throw new IllegalArgumentException(
s"Can not interpolate ${other.getClass.getName} into code block.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest {
GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)),
Seq(unresolved_a, unresolved_b), r1, Seq(unresolved_a, unresolved_b)))
val expected = Project(Seq(a, b), Sort(
Seq(SortOrder('aggOrder.byte.withNullability(false), Ascending)), true,
Seq(SortOrder('aggOrder.byte.withNullability(true), Ascending)), true,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why you changed this? You hit some test failures?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In line 38

lazy val grouping_a = Cast(ShiftRight(gid, 1) & 1, ByteType, Option(TimeZone.getDefault().getID))

The grouping expr is casting a integer to byte, so the nullability is true.

Aggregate(Seq(a, b, gid),
Seq(a, b, grouping_a.as("aggOrder")),
Expand(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1023,4 +1023,103 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(ret, InternalRow(null))
}
}

private def testIntMaxAndMin(dt: DataType): Unit = {
Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
checkEvaluation(cast(value, dt), null)
checkEvaluation(cast(value.toString, dt), null)
checkEvaluation(cast(Decimal(value.toString), dt), null)
checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), dt), null)
checkEvaluation(cast(Literal(value * 2.0f, FloatType), dt), null)
checkEvaluation(cast(Literal(value * 1.0, DoubleType), dt), null)
}
}

private def testLongMaxAndMin(dt: DataType): Unit = {
Seq(Decimal(Long.MaxValue) + Decimal(1), Decimal(Long.MinValue) - Decimal(1)).foreach { value =>
checkEvaluation(cast(value.toString, dt), null)
checkEvaluation(cast(value, dt), null)
checkEvaluation(cast((value * Decimal(1.1)).toFloat, dt), null)
checkEvaluation(cast((value * Decimal(1.1)).toDouble, dt), null)
}
}

test("Cast to byte") {
testIntMaxAndMin(ByteType)
Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
checkEvaluation(cast(value, ByteType), null)
checkEvaluation(cast(value.toString, ByteType), null)
checkEvaluation(cast(Decimal(value.toString), ByteType), null)
checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), null)
checkEvaluation(cast(Literal(value, DateType), ByteType), null)
checkEvaluation(cast(Literal(value * 1.0f, FloatType), ByteType), null)
checkEvaluation(cast(Literal(value * 1.0, DoubleType), ByteType), null)
}

Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
checkEvaluation(cast(value, ByteType), value)
checkEvaluation(cast(value.toString, ByteType), value)
checkEvaluation(cast(Decimal(value.toString), ByteType), value)
checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), value)
checkEvaluation(cast(Literal(value.toInt, DateType), ByteType), null)
checkEvaluation(cast(Literal(value * 1.0f, FloatType), ByteType), value)
checkEvaluation(cast(Literal(value * 1.0, DoubleType), ByteType), value)
}
}

test("Cast to short") {
testIntMaxAndMin(ShortType)
Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
checkEvaluation(cast(value, ShortType), null)
checkEvaluation(cast(value.toString, ShortType), null)
checkEvaluation(cast(Decimal(value.toString), ShortType), null)
checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), null)
checkEvaluation(cast(Literal(value, DateType), ShortType), null)
checkEvaluation(cast(Literal(value * 1.0f, FloatType), ShortType), null)
checkEvaluation(cast(Literal(value * 1.0, DoubleType), ShortType), null)
}

Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
checkEvaluation(cast(value, ShortType), value)
checkEvaluation(cast(value.toString, ShortType), value)
checkEvaluation(cast(Decimal(value.toString), ShortType), value)
checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), value)
checkEvaluation(cast(Literal(value.toInt, DateType), ShortType), null)
checkEvaluation(cast(Literal(value * 1.0f, FloatType), ShortType), value)
checkEvaluation(cast(Literal(value * 1.0, DoubleType), ShortType), value)
}
}

test("Cast to int") {
testIntMaxAndMin(IntegerType)
testLongMaxAndMin(IntegerType)

Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
checkEvaluation(cast(value, IntegerType), value)
checkEvaluation(cast(value.toString, IntegerType), value)
checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), IntegerType), value)
checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
}
checkEvaluation(cast(2147483647.4f, IntegerType), 2147483647)
Copy link
Member

@maropu maropu Aug 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this test, you tried to check this case? https://github.com/apache/spark/pull/25300/files#r309043580
If so, how about describing what's tested in comments?
Also, can you add boundary tests for null, e.g., checkEvaluation(cast(214748364?.?f, IntegerType), null)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it better to use ????.9 for fraction value to show cutoff truncation semantics?

checkEvaluation(cast(-2147483648.4f, IntegerType), -2147483648)
checkEvaluation(cast(2147483647.4D, IntegerType), 2147483647)
checkEvaluation(cast(-2147483648.4D, IntegerType), -2147483648)
}

test("Cast to long") {
testLongMaxAndMin(LongType)

Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
checkEvaluation(cast(value, LongType), value)
checkEvaluation(cast(value.toString, LongType), value)
checkEvaluation(cast(Decimal(value.toString), LongType), value)
checkEvaluation(cast(Literal(value, TimestampType), LongType),
Math.floorDiv(value, MICROS_PER_SECOND))
}
checkEvaluation(cast(9223372036854775807.4f, LongType), 9223372036854775807L)
checkEvaluation(cast(-9223372036854775808.4f, LongType), -9223372036854775808L)
checkEvaluation(cast(9223372036854775807.4D, LongType), 9223372036854775807L)
checkEvaluation(cast(-9223372036854775808.4D, LongType), -9223372036854775808L)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ SELECT int(float('-2147483900'))
-- !query 37 schema
struct<CAST(CAST(-2147483900 AS FLOAT) AS INT):int>
-- !query 37 output
-2147483648
NULL


-- !query 38
Expand Down Expand Up @@ -368,7 +368,7 @@ SELECT bigint(float('-9223380000000000000'))
-- !query 41 schema
struct<CAST(CAST(-9223380000000000000 AS FLOAT) AS BIGINT):bigint>
-- !query 41 output
-9223372036854775808
NULL


-- !query 42
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ SELECT bigint(double('-9223372036854780000'))
-- !query 93 schema
struct<CAST(CAST(-9223372036854780000 AS DOUBLE) AS BIGINT):bigint>
-- !query 93 output
-9223372036854775808
NULL


-- !query 94
Expand Down
16 changes: 8 additions & 8 deletions sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -606,10 +606,10 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456
-- !query 60 schema
struct<q1:int>
-- !query 60 output
-869367531
-869367531
-869367531
123
NULL
NULL
NULL


-- !query 61
Expand All @@ -625,10 +625,10 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456
-- !query 62 schema
struct<q1:smallint>
-- !query 62 output
-32491
-32491
-32491
123
NULL
NULL
NULL


-- !query 63
Expand Down Expand Up @@ -664,7 +664,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint)
-- !query 66 schema
struct<CAST(CAST(922337203685477580700.0 AS DOUBLE) AS BIGINT):bigint>
-- !query 66 output
9223372036854775807
NULL


-- !query 67
Expand Down Expand Up @@ -730,7 +730,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1)
-- !query 72 schema
struct<CAST((CAST(shiftleft(CAST(-1 AS BIGINT), 63) AS INT) + 1) AS STRING):string>
-- !query 72 output
1
NULL


-- !query 73
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// Limit clause without a ordering, which causes failure.
"orc_predicate_pushdown",

// On casting a out-of-range value to a integral type, Hive returns the low-order bits, while
// Spark returns null.
"udf_to_byte",

// Requires precision decimal support:
"udf_when",
"udf_case",
Expand Down Expand Up @@ -1086,7 +1090,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_sum",
"udf_tan",
"udf_tinyint",
"udf_to_byte",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is within expectation. There are two cases in udf_to_byte that failed after this PR:

SELECT CAST(-129 AS TINYINT) FROM src tablesample (1 rows);
SELECT CAST(CAST(-1025 AS BIGINT) AS TINYINT) FROM src tablesample (1 rows);

The results will be null for Spark, while the result of HIve is not null.
I will change the migration guide if this looks ok to you.

"udf_to_date",
"udf_to_double",
"udf_to_float",
Expand Down