From 3d77131f820ac825951b1e00d5653bad1a883ad5 Mon Sep 17 00:00:00 2001 From: kujon Date: Wed, 22 Jul 2020 17:19:15 +0100 Subject: [PATCH 1/3] fix: postgresql: added support for blank-padded, fixed length character arrays --- .../main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala | 2 +- .../src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index c8d8a3392128..a1ce25a0464c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -57,7 +57,7 @@ private object PostgresDialect extends JdbcDialect { case "int8" | "oid" => Some(LongType) case "float4" => Some(FloatType) case "money" | "float8" => Some(DoubleType) - case "text" | "varchar" | "char" | "cidr" | "inet" | "json" | "jsonb" | "uuid" => + case "text" | "varchar" | "char" | "bpchar" | "cidr" | "inet" | "json" | "jsonb" | "uuid" => Some(StringType) case "bytea" => Some(BinaryType) case "timestamp" | "timestamptz" | "time" | "timetz" => Some(TimestampType) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 0e3dd4447c3f..b95d8b26cc7b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -851,6 +851,8 @@ class JDBCSuite extends QueryTest assert(Postgres.getCatalystType(java.sql.Types.OTHER, "jsonb", 1, null) === Some(StringType)) assert(Postgres.getCatalystType(java.sql.Types.ARRAY, "_numeric", 0, md) == Some(ArrayType(DecimalType.SYSTEM_DEFAULT))) + assert(Postgres.getCatalystType(java.sql.Types.ARRAY, "_bpchar", 64, md) == + Some(ArrayType(StringType))) assert(Postgres.getJDBCType(FloatType).map(_.databaseTypeDefinition).get == "FLOAT4") assert(Postgres.getJDBCType(DoubleType).map(_.databaseTypeDefinition).get == "FLOAT8") assert(Postgres.getJDBCType(ByteType).map(_.databaseTypeDefinition).get == "SMALLINT") From 6bdc087bbbd6e0865192ebd7b6fc3d34e9ca308f Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 10 Aug 2020 09:43:13 +0900 Subject: [PATCH 2/3] Revert "fix: postgresql: added support for blank-padded, fixed length character arrays" This reverts commit 3d77131f820ac825951b1e00d5653bad1a883ad5. --- .../main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala | 2 +- .../src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index a1ce25a0464c..c8d8a3392128 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -57,7 +57,7 @@ private object PostgresDialect extends JdbcDialect { case "int8" | "oid" => Some(LongType) case "float4" => Some(FloatType) case "money" | "float8" => Some(DoubleType) - case "text" | "varchar" | "char" | "bpchar" | "cidr" | "inet" | "json" | "jsonb" | "uuid" => + case "text" | "varchar" | "char" | "cidr" | "inet" | "json" | "jsonb" | "uuid" => Some(StringType) case "bytea" => Some(BinaryType) case "timestamp" | "timestamptz" | "time" | "timetz" => Some(TimestampType) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index b95d8b26cc7b..0e3dd4447c3f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -851,8 +851,6 @@ class JDBCSuite extends QueryTest assert(Postgres.getCatalystType(java.sql.Types.OTHER, "jsonb", 1, null) === Some(StringType)) assert(Postgres.getCatalystType(java.sql.Types.ARRAY, "_numeric", 0, md) == Some(ArrayType(DecimalType.SYSTEM_DEFAULT))) - assert(Postgres.getCatalystType(java.sql.Types.ARRAY, "_bpchar", 64, md) == - Some(ArrayType(StringType))) assert(Postgres.getJDBCType(FloatType).map(_.databaseTypeDefinition).get == "FLOAT4") assert(Postgres.getJDBCType(DoubleType).map(_.databaseTypeDefinition).get == "FLOAT8") assert(Postgres.getJDBCType(ByteType).map(_.databaseTypeDefinition).get == "SMALLINT") From dd382118446dfa6f5b69f9ec51b03cb8e59008a9 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 10 Aug 2020 09:43:54 +0900 Subject: [PATCH 3/3] Add tests --- .../sql/jdbc/PostgresIntegrationSuite.scala | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index 6611bc2d19ed..c3920bdeb28e 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -76,8 +76,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { "'172.16.0.42']::inet[], ARRAY['192.168.0.0/24', '10.1.0.0/16']::cidr[], " + """ARRAY['{"a": "foo", "b": "bar"}', '{"a": 1, "b": 2}']::json[], """ + """ARRAY['{"a": 1, "b": 2, "c": 3}']::jsonb[])""" - ) - .executeUpdate() + ).executeUpdate() + + conn.prepareStatement("CREATE TABLE char_types (" + + "c0 char(4), c1 character(4), c2 character varying(4), c3 varchar(4), c4 bpchar)" + ).executeUpdate() + conn.prepareStatement("INSERT INTO char_types VALUES " + + "('abcd', 'efgh', 'ijkl', 'mnop', 'q')").executeUpdate() } test("Type mapping for various types") { @@ -218,4 +223,16 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getShort(0) === 1) assert(rows(0).getShort(1) === 2) } + + test("character type tests") { + val df = sqlContext.read.jdbc(jdbcUrl, "char_types", new Properties) + val row = df.collect() + assert(row.length == 1) + assert(row(0).length === 5) + assert(row(0).getString(0) === "abcd") + assert(row(0).getString(1) === "efgh") + assert(row(0).getString(2) === "ijkl") + assert(row(0).getString(3) === "mnop") + assert(row(0).getString(4) === "q") + } }