Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -1457,8 +1457,31 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
*/
override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) {
import ctx._
val structField = StructField(identifier.getText, typedVisit(dataType), nullable = true)
if (STRING == null) structField else structField.withComment(string(STRING))

val builder = new MetadataBuilder
// Add comment to metadata
if (STRING != null) {
builder.putString("comment", string(STRING))
}
// Add Hive type string to metadata.
dataType match {
case p: PrimitiveDataTypeContext =>
val dt = p.identifier.getText.toLowerCase
(dt, p.INTEGER_VALUE().asScala.toList) match {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

p.identifier.getText.toLowerCase match {
  case "varchar" | "char" => builder.putString(HIVE_TYPE_STRING, dataType.getText.toLowerCase)
}

case ("varchar" | "char", Nil) =>
builder.putString(HIVE_TYPE_STRING, dt)
case ("varchar" | "char", size :: Nil) =>
builder.putString(HIVE_TYPE_STRING, dt + "(" + size.getText + ")")
case _ =>
}
case _ =>
}

StructField(
identifier.getText,
typedVisit(dataType),
nullable = true,
builder.build())
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,10 @@ package org.apache.spark.sql
* Contains a type system for attributes produced by relations, including complex types like
* structs, arrays and maps.
*/
package object types
package object types {
/**
* Metadata key used to store the Hive type name. This is relevant for datatypes that do not
* have a direct Spark SQL counterpart, such as CHAR and VARCHAR.
*/
val HIVE_TYPE_STRING = "HIVE_TYPE_STRING"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we remove HiveUtils. HIVE_TYPE_STRING?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah we should.

}
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
(2 to 10).map(i => Row(i, i - 1)).toSeq)

test("Schema and all fields") {
def hiveMetadata(dt: String): Metadata = {
new MetadataBuilder().putString("HIVE_TYPE_STRING", dt).build()
}

val expectedSchema = StructType(
StructField("string$%Field", StringType, true) ::
StructField("binaryField", BinaryType, true) ::
Expand All @@ -217,8 +221,8 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
StructField("decimalField2", DecimalType(9, 2), true) ::
StructField("dateField", DateType, true) ::
StructField("timestampField", TimestampType, true) ::
StructField("varcharField", StringType, true) ::
StructField("charField", StringType, true) ::
StructField("varcharField", StringType, true, hiveMetadata("varchar(12)")) ::
StructField("charField", StringType, true, hiveMetadata("char(18)")) ::
StructField("arrayFieldSimple", ArrayType(IntegerType), true) ::
StructField("arrayFieldComplex",
ArrayType(
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,28 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
hiveClient.runSqlHive("DROP TABLE IF EXISTS orc_varchar")
}
}

test("read varchar column from orc tables created by hive") {
try {
Copy link
Contributor

@cloud-fan cloud-fan Feb 6, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about

    val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
     try {
       hiveClient.runSqlHive("CREATE TABLE hive_orc(a VARCHAR(10)) STORED AS orc LOCATION xxx")
       hiveClient.runSqlHive("INSERT INTO TABLE hive_orc SELECT 'a' FROM (SELECT 1) t")
       sql("CREATE EXTERNAL TABLE spark_orc ...")
       checkAnswer...
     } finally {
        sql("DROP TABLE IF EXISTS ...")
        ...
      }

then we don't need to create the orc file manually.

// This is an ORC file with a STRING, a CHAR(10) and a VARCHAR(10) column that has been
// created using Hive 1.2.1
val hiveOrc = new File(Thread.currentThread().getContextClassLoader
.getResource(s"data/files/orc/").getFile).toURI
sql(
s"""
|CREATE EXTERNAL TABLE test_hive_orc(
| a STRING,
| b CHAR(10),
| c VARCHAR(10)
|)
|STORED AS ORC
|LOCATION '$hiveOrc'
""".stripMargin)
checkAnswer(spark.table("test_hive_orc"), Row("a", "b ", "c"))
} finally {
sql("DROP TABLE IF EXISTS test_hive_orc")
}
}
}

class OrcSourceSuite extends OrcSuite {
Expand Down