apache · dongjoon-hyun · Sep 4, 2017 · Sep 5, 2017 · Sep 5, 2017 · Sep 5, 2017
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -32,14 +32,15 @@ import org.apache.hadoop.io.{NullWritable, Writable}
 import org.apache.hadoop.mapred.{JobConf, OutputFormat => MapRedOutputFormat, RecordWriter, Reporter}
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
+import org.apache.orc.TypeDescription
 
 import org.apache.spark.TaskContext
-import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.{HiveInspectors, HiveShim}
-import org.apache.spark.sql.sources.{Filter, _}
+import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -83,6 +84,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
           classOf[MapRedOutputFormat[_, _]])
     }
 
+    dataSchema.map(_.name).foreach(checkFieldName)
+
     new OutputWriterFactory {
       override def newInstance(
           path: String,
@@ -169,6 +172,18 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
       }
     }
   }
+
+  private def checkFieldName(name: String): Unit = {
+    try {
+      TypeDescription.fromString(s"struct<$name:int>")
+    } catch {
+      case _: IllegalArgumentException =>
+        throw new AnalysisException(
+          s"""Attribute name "$name" contains invalid character(s).
+             |Please use alias to rename it.
+           """.stripMargin.split("\n").mkString(" ").trim)
+    }
+  }
 }
 
 private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2000,4 +2000,15 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       assert(setOfPath.size() == pathSizeToDeleteOnExit)
     }
   }
+
+  test("SPARK-21912 Creating ORC datasource table should check invalid column names") {
+    withTable("orc1") {
+      Seq(" ", "?", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
+        val m = intercept[AnalysisException] {
+          sql(s"CREATE TABLE orc1 USING ORC AS SELECT 1 `column$name`")
+        }.getMessage
+        assert(m.contains(s"contains invalid character(s)"))
+      }
+    }
+  }
 }