Skip to content

Commit 49606c8

Browse files
committed
Fix for SPARK-28152:[JDBC Connector] ShortType and FloatTypes are not mapped correctly for read/write of SQLServer Tables
ShortType and FloatTypes are not correctly mapped to right JDBC types when using JDBC connector. This results in tables and spark data frame being created with unintended types. The issue was observed when validating against SQLServer. Some example issue - Write from df with column type results in a SQL table of with column type as INTEGER as opposed to SMALLINT. Thus a larger table that expected. - Read results in a dataframe with type INTEGER as opposed to ShortType FloatTypes have a issue with read path. In the write path Spark data type 'FloatType' is correctly mapped to JDBC equivalent data type 'Real'. But in the read path when JDBC data types need to be converted to Catalyst data types ( getCatalystType) 'Real' gets incorrectly gets mapped to 'DoubleType' rather than 'FloatType'. Post fix ShortType is correctly mapped to SMALLINT and FloatType is mapped to REAL
1 parent aa41dce commit 49606c8

3 files changed

Lines changed: 24 additions & 7 deletions

File tree

external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,24 +120,24 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
120120
assert(types.length == 12)
121121
assert(types(0).equals("class java.lang.Boolean"))
122122
assert(types(1).equals("class java.lang.Integer"))
123-
assert(types(2).equals("class java.lang.Integer"))
123+
assert(types(2).equals("class java.lang.Short"))
124124
assert(types(3).equals("class java.lang.Integer"))
125125
assert(types(4).equals("class java.lang.Long"))
126126
assert(types(5).equals("class java.lang.Double"))
127-
assert(types(6).equals("class java.lang.Double"))
128-
assert(types(7).equals("class java.lang.Double"))
127+
assert(types(6).equals("class java.lang.Float"))
128+
assert(types(7).equals("class java.lang.Float"))
129129
assert(types(8).equals("class java.math.BigDecimal"))
130130
assert(types(9).equals("class java.math.BigDecimal"))
131131
assert(types(10).equals("class java.math.BigDecimal"))
132132
assert(types(11).equals("class java.math.BigDecimal"))
133133
assert(row.getBoolean(0) == false)
134134
assert(row.getInt(1) == 255)
135-
assert(row.getInt(2) == 32767)
135+
assert(row.getShort(2) == 32767)
136136
assert(row.getInt(3) == 2147483647)
137137
assert(row.getLong(4) == 9223372036854775807L)
138138
assert(row.getDouble(5) == 1.2345678901234512E14) // float = float(53) has 15-digits precision
139-
assert(row.getDouble(6) == 1.23456788103168E14) // float(24) has 7-digits precision
140-
assert(row.getDouble(7) == 1.23456788103168E14) // real = float(24)
139+
assert(row.getFloat(6) == 1.23456788103168E14) // float(24) has 7-digits precision
140+
assert(row.getFloat(7) == 1.23456788103168E14) // real = float(24)
141141
assert(row.getAs[BigDecimal](8).equals(new BigDecimal("123.00")))
142142
assert(row.getAs[BigDecimal](9).equals(new BigDecimal("12345.12000")))
143143
assert(row.getAs[BigDecimal](10).equals(new BigDecimal("922337203685477.5800")))

sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ private object MsSqlServerDialect extends JdbcDialect {
3030
// String is recommend by Microsoft SQL Server for datetimeoffset types in non-MS clients
3131
Option(StringType)
3232
} else {
33-
None
33+
sqlType match {
34+
case java.sql.Types.SMALLINT => Some(ShortType)
35+
case java.sql.Types.REAL => Some(FloatType)
36+
case _ => None
37+
}
3438
}
3539
}
3640

@@ -39,6 +43,7 @@ private object MsSqlServerDialect extends JdbcDialect {
3943
case StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR))
4044
case BooleanType => Some(JdbcType("BIT", java.sql.Types.BIT))
4145
case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY))
46+
case ShortType => Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
4247
case _ => None
4348
}
4449

sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,8 +895,20 @@ class JDBCSuite extends QueryTest
895895
"BIT")
896896
assert(msSqlServerDialect.getJDBCType(BinaryType).map(_.databaseTypeDefinition).get ==
897897
"VARBINARY(MAX)")
898+
assert(msSqlServerDialect.getJDBCType(ShortType).map(_.databaseTypeDefinition).get ==
899+
"SMALLINT")
898900
}
899901

902+
test("SPARK-28152 MsSqlServerDialect catalyst type mapping") {
903+
val msSqlServerDialect = JdbcDialects.get("jdbc:sqlserver")
904+
val metadata = new MetadataBuilder().putLong("scale", 1)
905+
assert(msSqlServerDialect.getCatalystType(java.sql.Types.SMALLINT, "SMALLINT", 1,
906+
metadata).get == ShortType)
907+
assert(msSqlServerDialect.getCatalystType(java.sql.Types.REAL, "REAL", 1,
908+
metadata).get == FloatType)
909+
}
910+
911+
900912
test("table exists query by jdbc dialect") {
901913
val MySQL = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
902914
val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")

0 commit comments

Comments
 (0)