-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-10186][SQL][WIP] Array types using JDBCRDD and postgres #9137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
72beea6
c55e5ff
bcbafc4
5db8589
2664dc8
755ec14
c07e5f2
8d40495
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,7 +72,7 @@ abstract class JdbcDialect { | |
| * or null if the default type mapping should be used. | ||
| */ | ||
| def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = None | ||
| sqlType: Int, typeName: String, size: Int, scale: Int, md: MetadataBuilder): Option[DataType] = None | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a scaladoc entry for scale. |
||
|
|
||
| /** | ||
| * Retrieve the jdbc / sql type for a given datatype. | ||
|
|
@@ -81,6 +81,24 @@ abstract class JdbcDialect { | |
| */ | ||
| def getJDBCType(dt: DataType): Option[JdbcType] = None | ||
|
|
||
| def getCommonJDBCType(dataType: DataType): Option[JdbcType] = { | ||
| dataType match { | ||
| case IntegerType => Some(JdbcType("INTEGER", java.sql.Types.INTEGER)) | ||
| case LongType => Some(JdbcType("BIGINT", java.sql.Types.BIGINT)) | ||
| case DoubleType => Some(JdbcType("DOUBLE PRECISION", java.sql.Types.DOUBLE)) | ||
| case FloatType => Some(JdbcType("REAL", java.sql.Types.FLOAT)) | ||
| case ShortType => Some(JdbcType("INTEGER", java.sql.Types.SMALLINT)) | ||
| case ByteType => Some(JdbcType("BYTE", java.sql.Types.TINYINT)) | ||
| case BooleanType => Some(JdbcType("BIT(1)", java.sql.Types.BIT)) | ||
| case StringType => Some(JdbcType("TEXT", java.sql.Types.CLOB)) | ||
| case BinaryType => Some(JdbcType("BLOB", java.sql.Types.BLOB)) | ||
| case TimestampType => Some(JdbcType("TIMESTAMP", java.sql.Types.TIMESTAMP)) | ||
| case DateType => Some(JdbcType("DATE", java.sql.Types.DATE)) | ||
| case t: DecimalType => Some(JdbcType(s"DECIMAL(${t.precision},${t.scale})", java.sql.Types.DECIMAL)) | ||
| case _ => None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Quotes the identifier. This is used to put quotes around the identifier in case the column | ||
| * name is a reserved keyword, or in case it contains characters that require quotes (e.g. space). | ||
|
|
@@ -170,8 +188,8 @@ class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect { | |
| dialects.map(_.canHandle(url)).reduce(_ && _) | ||
|
|
||
| override def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { | ||
| dialects.flatMap(_.getCatalystType(sqlType, typeName, size, md)).headOption | ||
| sqlType: Int, typeName: String, size: Int, scale: Int, md: MetadataBuilder): Option[DataType] = { | ||
| dialects.flatMap(_.getCatalystType(sqlType, typeName, size, scale, md)).headOption | ||
| } | ||
|
|
||
| override def getJDBCType(dt: DataType): Option[JdbcType] = { | ||
|
|
@@ -196,7 +214,7 @@ case object NoopDialect extends JdbcDialect { | |
| case object PostgresDialect extends JdbcDialect { | ||
| override def canHandle(url: String): Boolean = url.startsWith("jdbc:postgresql") | ||
| override def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { | ||
| sqlType: Int, typeName: String, size: Int, scale: Int, md: MetadataBuilder): Option[DataType] = { | ||
| if (sqlType == Types.BIT && typeName.equals("bit") && size != 1) { | ||
| Some(BinaryType) | ||
| } else if (sqlType == Types.OTHER && typeName.equals("cidr")) { | ||
|
|
@@ -207,13 +225,39 @@ case object PostgresDialect extends JdbcDialect { | |
| Some(StringType) | ||
| } else if (sqlType == Types.OTHER && typeName.equals("jsonb")) { | ||
| Some(StringType) | ||
| } else if (sqlType == Types.OTHER && typeName.equals("uuid")) { | ||
| Some(StringType) | ||
| } else if (sqlType == Types.ARRAY) { | ||
| typeName match { | ||
|
||
| case "_bit" | "_bool" => Some(ArrayType(BooleanType)) | ||
| case "_int2" => Some(ArrayType(ShortType)) | ||
| case "_int4" => Some(ArrayType(IntegerType)) | ||
| case "_int8" | "_oid" => Some(ArrayType(LongType)) | ||
| case "_float4" => Some(ArrayType(FloatType)) | ||
| case "_money" | "_float8" => Some(ArrayType(DoubleType)) | ||
| case "_text" | "_varchar" | "_char" | "_bpchar" | "_name" => Some(ArrayType(StringType)) | ||
| case "_bytea" => Some(ArrayType(BinaryType)) | ||
| case "_timestamp" | "_timestamptz" | "_time" | "_timetz" => Some(ArrayType(TimestampType)) | ||
| case "_date" => Some(ArrayType(DateType)) | ||
| case "_numeric" | ||
| if size != 0 || scale != 0 => Some(ArrayType(DecimalType(size, scale))) | ||
| case "_numeric" => Some(ArrayType(DecimalType.SYSTEM_DEFAULT)) | ||
| case _ => throw new IllegalArgumentException(s"Unhandled postgres array type $typeName") | ||
| } | ||
| } else None | ||
| } | ||
|
|
||
| override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { | ||
| case StringType => Some(JdbcType("TEXT", java.sql.Types.CHAR)) | ||
| case BinaryType => Some(JdbcType("BYTEA", java.sql.Types.BINARY)) | ||
| case BooleanType => Some(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) | ||
| case ArrayType(t, _) => | ||
| val subtype = getJDBCType(t).map(_.databaseTypeDefinition).getOrElse( | ||
| getCommonJDBCType(t).map(_.databaseTypeDefinition).getOrElse( | ||
| throw new IllegalArgumentException(s"Unexpected JDBC array subtype $t") | ||
| ) | ||
| ) | ||
| Some(JdbcType(s"$subtype[]", java.sql.Types.ARRAY)) | ||
| case _ => None | ||
| } | ||
|
|
||
|
|
@@ -231,7 +275,7 @@ case object PostgresDialect extends JdbcDialect { | |
| case object MySQLDialect extends JdbcDialect { | ||
| override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") | ||
| override def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { | ||
| sqlType: Int, typeName: String, size: Int, scale: Int, md: MetadataBuilder): Option[DataType] = { | ||
| if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { | ||
| // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as | ||
| // byte arrays instead of longs. | ||
|
|
@@ -276,7 +320,7 @@ case object DB2Dialect extends JdbcDialect { | |
| case object MsSqlServerDialect extends JdbcDialect { | ||
| override def canHandle(url: String): Boolean = url.startsWith("jdbc:sqlserver") | ||
| override def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { | ||
| sqlType: Int, typeName: String, size: Int, scale: Int, md: MetadataBuilder): Option[DataType] = { | ||
| if (typeName.contains("datetimeoffset")) { | ||
| // String is recommend by Microsoft SQL Server for datetimeoffset types in non-MS clients | ||
| Some(StringType) | ||
|
|
@@ -298,7 +342,7 @@ case object MsSqlServerDialect extends JdbcDialect { | |
| case object DerbyDialect extends JdbcDialect { | ||
| override def canHandle(url: String): Boolean = url.startsWith("jdbc:derby") | ||
| override def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { | ||
| sqlType: Int, typeName: String, size: Int, scale: Int, md: MetadataBuilder): Option[DataType] = { | ||
| if (sqlType == Types.REAL) Option(FloatType) else None | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ISTM we need to check if input types are valid for target databases in advance, e.g., in JavaUtils#saveTable.
JavaUtils#savePartition should simply put input data as given typed-data.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the particular dialect does not support these types saveTable should toss an exception when building the nullTypes array