Skip to content

Commit 1017327

Browse files
hvanhovellrxin
authored andcommitted
[SPARK-12848][SQL] Change parsed decimal literal datatype from Double to Decimal
The current parser turns a decimal literal, for example ```12.1```, into a Double. The problem with this approach is that we convert an exact literal into a non-exact ```Double```. The PR changes this behavior, a Decimal literal is now converted into an extact ```BigDecimal```. The behavior for scientific decimals, for example ```12.1e01```, is unchanged. This will be converted into a Double. This PR replaces the ```BigDecimal``` literal by a ```Double``` literal, because the ```BigDecimal``` is the default now. You can use the double literal by appending a 'D' to the value, for instance: ```3.141527D``` cc davies rxin Author: Herman van Hovell <[email protected]> Closes #10796 from hvanhovell/SPARK-12848.
1 parent f3934a8 commit 1017327

28 files changed

Lines changed: 83 additions & 59 deletions

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1812,7 +1812,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
18121812
expect_equal(coltypes(x), "map<string,string>")
18131813

18141814
df <- selectExpr(read.json(sqlContext, jsonPath), "name", "(age * 1.21) as age")
1815-
expect_equal(dtypes(df), list(c("name", "string"), c("age", "double")))
1815+
expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)")))
18161816

18171817
df1 <- select(df, cast(df$age, "integer"))
18181818
coltypes(df) <- c("character", "integer")

sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ constant
122122
| BigintLiteral
123123
| SmallintLiteral
124124
| TinyintLiteral
125-
| DecimalLiteral
125+
| DoubleLiteral
126126
| booleanValue
127127
;
128128

sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,9 +418,9 @@ TinyintLiteral
418418
(Digit)+ 'Y'
419419
;
420420

421-
DecimalLiteral
421+
DoubleLiteral
422422
:
423-
Number 'B' 'D'
423+
Number 'D'
424424
;
425425

426426
ByteLengthLiteral

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
623623
val CASE = "(?i)CASE".r
624624

625625
val INTEGRAL = "[+-]?\\d+".r
626+
val DECIMAL = "[+-]?((\\d+(\\.\\d*)?)|(\\.\\d+))".r
626627

627628
protected def nodeToExpr(node: ASTNode): Expression = node match {
628629
/* Attribute References */
@@ -785,8 +786,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
785786
case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
786787
Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
787788

788-
case ast if ast.tokenType == SparkSqlParser.DecimalLiteral =>
789-
Literal(Decimal(ast.text.substring(0, ast.text.length() - 2)))
789+
case ast if ast.tokenType == SparkSqlParser.DoubleLiteral =>
790+
Literal(ast.text.toDouble)
790791

791792
case ast if ast.tokenType == SparkSqlParser.Number =>
792793
val text = ast.text
@@ -799,7 +800,10 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
799800
Literal(v.longValue())
800801
case v => Literal(v.underlying())
801802
}
803+
case DECIMAL(_*) =>
804+
Literal(BigDecimal(text).underlying())
802805
case _ =>
806+
// Convert a scientifically notated decimal into a double.
803807
Literal(text.toDouble)
804808
}
805809
case ast if ast.tokenType == SparkSqlParser.StringLiteral =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,12 +692,11 @@ object HiveTypeCoercion {
692692
case e if !e.childrenResolved => e
693693
// Find tightest common type for If, if the true value and false value have different types.
694694
case i @ If(pred, left, right) if left.dataType != right.dataType =>
695-
findTightestCommonTypeToString(left.dataType, right.dataType).map { widestType =>
695+
findWiderTypeForTwo(left.dataType, right.dataType).map { widestType =>
696696
val newLeft = if (left.dataType == widestType) left else Cast(left, widestType)
697697
val newRight = if (right.dataType == widestType) right else Cast(right, widestType)
698698
If(pred, newLeft, newRight)
699699
}.getOrElse(i) // If there is no applicable conversion, leave expression unchanged.
700-
701700
// Convert If(null literal, _, _) into boolean type.
702701
// In the optimizer, we should short-circuit this directly into false value.
703702
case If(pred, left, right) if pred.dataType == NullType =>

sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
212212
Seq(Row(5, 10, 0), Row(55, 60, 100), Row(555, 560, 600))
213213
)
214214

215-
val pi = "3.1415BD"
215+
val pi = "3.1415"
216216
checkAnswer(
217217
sql(s"SELECT round($pi, -3), round($pi, -2), round($pi, -1), " +
218218
s"round($pi, 0), round($pi, 1), round($pi, 2), round($pi, 3)"),
@@ -367,6 +367,16 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
367367
checkAnswer(
368368
input.toDF("key", "value").selectExpr("abs(key) a").sort("a"),
369369
input.map(pair => Row(pair._2)))
370+
371+
checkAnswer(
372+
sql("select abs(0), abs(-1), abs(123), abs(-9223372036854775807), abs(9223372036854775807)"),
373+
Row(0, 1, 123, 9223372036854775807L, 9223372036854775807L)
374+
)
375+
376+
checkAnswer(
377+
sql("select abs(0.0), abs(-3.14159265), abs(3.14159265)"),
378+
Row(BigDecimal("0.0"), BigDecimal("3.14159265"), BigDecimal("3.14159265"))
379+
)
370380
}
371381

372382
test("log2") {

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,19 +1174,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
11741174

11751175
test("Floating point number format") {
11761176
checkAnswer(
1177-
sql("SELECT 0.3"), Row(0.3)
1177+
sql("SELECT 0.3"), Row(BigDecimal(0.3))
11781178
)
11791179

11801180
checkAnswer(
1181-
sql("SELECT -0.8"), Row(-0.8)
1181+
sql("SELECT -0.8"), Row(BigDecimal(-0.8))
11821182
)
11831183

11841184
checkAnswer(
1185-
sql("SELECT .5"), Row(0.5)
1185+
sql("SELECT .5"), Row(BigDecimal(0.5))
11861186
)
11871187

11881188
checkAnswer(
1189-
sql("SELECT -.18"), Row(-0.18)
1189+
sql("SELECT -.18"), Row(BigDecimal(-0.18))
11901190
)
11911191
}
11921192

@@ -1200,11 +1200,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
12001200
)
12011201

12021202
checkAnswer(
1203-
sql("SELECT 9223372036854775808BD"), Row(new java.math.BigDecimal("9223372036854775808"))
1203+
sql("SELECT 9223372036854775808"), Row(new java.math.BigDecimal("9223372036854775808"))
12041204
)
12051205

12061206
checkAnswer(
1207-
sql("SELECT -9223372036854775809BD"), Row(new java.math.BigDecimal("-9223372036854775809"))
1207+
sql("SELECT -9223372036854775809"), Row(new java.math.BigDecimal("-9223372036854775809"))
12081208
)
12091209
}
12101210

@@ -1219,11 +1219,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
12191219
)
12201220

12211221
checkAnswer(
1222-
sql("SELECT -5.2BD"), Row(BigDecimal(-5.2))
1222+
sql("SELECT -5.2"), Row(BigDecimal(-5.2))
12231223
)
12241224

12251225
checkAnswer(
1226-
sql("SELECT +6.8"), Row(6.8d)
1226+
sql("SELECT +6.8e0"), Row(6.8d)
12271227
)
12281228

12291229
checkAnswer(
@@ -1598,20 +1598,20 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
15981598
}
15991599

16001600
test("decimal precision with multiply/division") {
1601-
checkAnswer(sql("select 10.3BD * 3.0BD"), Row(BigDecimal("30.90")))
1602-
checkAnswer(sql("select 10.3000BD * 3.0BD"), Row(BigDecimal("30.90000")))
1603-
checkAnswer(sql("select 10.30000BD * 30.0BD"), Row(BigDecimal("309.000000")))
1604-
checkAnswer(sql("select 10.300000000000000000BD * 3.000000000000000000BD"),
1601+
checkAnswer(sql("select 10.3 * 3.0"), Row(BigDecimal("30.90")))
1602+
checkAnswer(sql("select 10.3000 * 3.0"), Row(BigDecimal("30.90000")))
1603+
checkAnswer(sql("select 10.30000 * 30.0"), Row(BigDecimal("309.000000")))
1604+
checkAnswer(sql("select 10.300000000000000000 * 3.000000000000000000"),
16051605
Row(BigDecimal("30.900000000000000000000000000000000000", new MathContext(38))))
1606-
checkAnswer(sql("select 10.300000000000000000BD * 3.0000000000000000000BD"),
1606+
checkAnswer(sql("select 10.300000000000000000 * 3.0000000000000000000"),
16071607
Row(null))
16081608

1609-
checkAnswer(sql("select 10.3BD / 3.0BD"), Row(BigDecimal("3.433333")))
1610-
checkAnswer(sql("select 10.3000BD / 3.0BD"), Row(BigDecimal("3.4333333")))
1611-
checkAnswer(sql("select 10.30000BD / 30.0BD"), Row(BigDecimal("0.343333333")))
1612-
checkAnswer(sql("select 10.300000000000000000BD / 3.00000000000000000BD"),
1609+
checkAnswer(sql("select 10.3 / 3.0"), Row(BigDecimal("3.433333")))
1610+
checkAnswer(sql("select 10.3000 / 3.0"), Row(BigDecimal("3.4333333")))
1611+
checkAnswer(sql("select 10.30000 / 30.0"), Row(BigDecimal("0.343333333")))
1612+
checkAnswer(sql("select 10.300000000000000000 / 3.00000000000000000"),
16131613
Row(BigDecimal("3.433333333333333333333333333", new MathContext(38))))
1614-
checkAnswer(sql("select 10.3000000000000000000BD / 3.00000000000000000BD"),
1614+
checkAnswer(sql("select 10.3000000000000000000 / 3.00000000000000000"),
16151615
Row(BigDecimal("3.4333333333333333333333333333", new MathContext(38))))
16161616
}
16171617

@@ -1637,13 +1637,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
16371637
}
16381638

16391639
test("precision smaller than scale") {
1640-
checkAnswer(sql("select 10.00BD"), Row(BigDecimal("10.00")))
1641-
checkAnswer(sql("select 1.00BD"), Row(BigDecimal("1.00")))
1642-
checkAnswer(sql("select 0.10BD"), Row(BigDecimal("0.10")))
1643-
checkAnswer(sql("select 0.01BD"), Row(BigDecimal("0.01")))
1644-
checkAnswer(sql("select 0.001BD"), Row(BigDecimal("0.001")))
1645-
checkAnswer(sql("select -0.01BD"), Row(BigDecimal("-0.01")))
1646-
checkAnswer(sql("select -0.001BD"), Row(BigDecimal("-0.001")))
1640+
checkAnswer(sql("select 10.00"), Row(BigDecimal("10.00")))
1641+
checkAnswer(sql("select 1.00"), Row(BigDecimal("1.00")))
1642+
checkAnswer(sql("select 0.10"), Row(BigDecimal("0.10")))
1643+
checkAnswer(sql("select 0.01"), Row(BigDecimal("0.01")))
1644+
checkAnswer(sql("select 0.001"), Row(BigDecimal("0.001")))
1645+
checkAnswer(sql("select -0.01"), Row(BigDecimal("-0.01")))
1646+
checkAnswer(sql("select -0.001"), Row(BigDecimal("-0.001")))
16471647
}
16481648

16491649
test("external sorting updates peak execution memory") {

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,13 +442,13 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
442442

443443
// Number and String conflict: resolve the type as number in this query.
444444
checkAnswer(
445-
sql("select num_str + 1.2BD from jsonTable where num_str > 14"),
445+
sql("select num_str + 1.2 from jsonTable where num_str > 14"),
446446
Row(BigDecimal("92233720368547758071.2"))
447447
)
448448

449449
// Number and String conflict: resolve the type as number in this query.
450450
checkAnswer(
451-
sql("select num_str + 1.2BD from jsonTable where num_str >= 92233720368547758060BD"),
451+
sql("select num_str + 1.2 from jsonTable where num_str >= 92233720368547758060"),
452452
Row(new java.math.BigDecimal("92233720368547758071.2"))
453453
)
454454

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,14 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
323323
// Feature removed in HIVE-11145
324324
"alter_partition_protect_mode",
325325
"drop_partitions_ignore_protection",
326-
"protectmode"
326+
"protectmode",
327+
328+
// Spark parser treats numerical literals differently: it creates decimals instead of doubles.
329+
"udf_abs",
330+
"udf_format_number",
331+
"udf_round",
332+
"udf_round_3",
333+
"view_cast"
327334
)
328335

329336
/**
@@ -884,7 +891,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
884891
"udf_10_trims",
885892
"udf_E",
886893
"udf_PI",
887-
"udf_abs",
888894
"udf_acos",
889895
"udf_add",
890896
"udf_array",
@@ -928,7 +934,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
928934
"udf_find_in_set",
929935
"udf_float",
930936
"udf_floor",
931-
"udf_format_number",
932937
"udf_from_unixtime",
933938
"udf_greaterthan",
934939
"udf_greaterthanorequal",
@@ -976,8 +981,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
976981
"udf_regexp_replace",
977982
"udf_repeat",
978983
"udf_rlike",
979-
"udf_round",
980-
"udf_round_3",
981984
"udf_rpad",
982985
"udf_rtrim",
983986
"udf_sign",

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
559559
"""
560560
|select p_mfgr,p_name, p_size,
561561
|histogram_numeric(p_retailprice, 5) over w1 as hist,
562-
|percentile(p_partkey, 0.5) over w1 as per,
562+
|percentile(p_partkey, cast(0.5 as double)) over w1 as per,
563563
|row_number() over(distribute by p_mfgr sort by p_name) as rn
564564
|from part
565565
|window w1 as (distribute by p_mfgr sort by p_mfgr, p_name

0 commit comments

Comments
 (0)