Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.datasources.{BucketSpec, CreateTableUsingA
import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
import org.apache.spark.sql.execution.streaming.{MemoryPlan, MemorySink, StreamExecution}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.jdbc.JdbcDialects
import org.apache.spark.sql.sources.HadoopFsRelation
import org.apache.spark.util.Utils

Expand Down Expand Up @@ -496,7 +497,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
// connectionProperties should override settings in extraOptions
props.putAll(connectionProperties)
val conn = JdbcUtils.createConnectionFactory(url, props)()

val dialect = JdbcDialects.get(url)
try {
var tableExists = JdbcUtils.tableExists(conn, url, table)

Expand All @@ -515,7 +516,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {

// Create the table if the table didn't exist.
if (!tableExists) {
val schema = JdbcUtils.schemaString(df, url)
val schema = JdbcUtils.schemaString(dialect, df, url)
val sql = s"CREATE TABLE $table ($schema)"
val statement = conn.createStatement
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@ object JdbcUtils extends Logging {
/**
* Returns a PreparedStatement that inserts a row into table via conn.
*/
def insertStatement(conn: Connection, table: String, rddSchema: StructType): PreparedStatement = {
val columns = rddSchema.fields.map(_.name).mkString(",")
def insertStatement(dialect: JdbcDialect, conn: Connection, table: String, rddSchema: StructType)
: PreparedStatement = {
val columns = rddSchema.fields.map(f => quoteColumnName(dialect, f.name)).mkString(",")
val placeholders = rddSchema.fields.map(_ => "?").mkString(",")
val sql = s"INSERT INTO $table ($columns) VALUES ($placeholders)"
conn.prepareStatement(sql)
Expand Down Expand Up @@ -169,7 +170,7 @@ object JdbcUtils extends Logging {
if (supportsTransactions) {
conn.setAutoCommit(false) // Everything in the same db transaction.
}
val stmt = insertStatement(conn, table, rddSchema)
val stmt = insertStatement(dialect, conn, table, rddSchema)
try {
var rowCount = 0
while (iterator.hasNext) {
Expand Down Expand Up @@ -245,14 +246,24 @@ object JdbcUtils extends Logging {
Array[Byte]().iterator
}

/**
* The utility to add quote to the column name based on its dialect
* @param dialect the JDBC dialect
* @param columnName the input column name
* @return the quoted column name
*/
private def quoteColumnName(dialect: JdbcDialect, columnName: String): String = {
dialect.quoteIdentifier(columnName)
}

/**
* Compute the schema string for this RDD.
*/
def schemaString(df: DataFrame, url: String): String = {
def schemaString(dialect: JdbcDialect, df: DataFrame, url: String): String = {
val sb = new StringBuilder()
val dialect = JdbcDialects.get(url)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the new dialect you're passing in different from this one in some way?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose to pass in dialect is to get proper quote for columns based on its data source. Any suggestion?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You pass in a parameter named dialect to the schemaString method, but there's also the dialect that comes from JdbcDialects.get(url) --- that's the duplicate I was trying to point out

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing out. I've modified the codes. Please check it out.

df.schema.fields foreach { field =>
val name = field.name
val name = quoteColumnName(dialect, field.name)
val typ: String = getJdbcType(field.dataType, dialect).databaseTypeDefinition
val nullable = if (field.nullable) "" else "NOT NULL"
sb.append(s", $name $typ $nullable")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
conn1.prepareStatement("insert into test.people values ('mary', 2)").executeUpdate()
conn1.prepareStatement("drop table if exists test.people1").executeUpdate()
conn1.prepareStatement(
"create table test.people1 (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate()
"create table test.people1 (name TEXT(32) NOT NULL, `the id` INTEGER NOT NULL)")
.executeUpdate()
conn1.commit()

sql(
Expand Down Expand Up @@ -151,4 +152,10 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
assert(2 === sqlContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count)
assert(2 === sqlContext.read.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
}

test("SPARK-14460: Insert into table with column containing space") {
val df = sqlContext.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
df.write.insertInto("PEOPLE1")
assert(2 === sqlContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count)
}
}