apache · maropu · Jun 22, 2015 · Jun 22, 2015 · Jun 22, 2015 · Jun 22, 2015
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -22,10 +22,13 @@ import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.primitive._
 import org.apache.hadoop.hive.serde2.{io => hiveIo}
 import org.apache.hadoop.{io => hadoopIo}
+import org.apache.spark.Logging
+import org.apache.spark.annotation.DeveloperApi
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types
+import org.apache.spark.sql.{AnalysisException, types}
 import org.apache.spark.sql.types._
 
 /* Implicit conversions */
@@ -170,7 +173,7 @@ import scala.collection.JavaConversions._
  *       e.g. date_add(printf("%s-%s-%s", a,b,c), 3)
  *       We don't need to unwrap the data for printf and wrap it again and passes in data_add
  */
-private[hive] trait HiveInspectors {
+private[hive] trait HiveInspectors extends Logging {
 
   def javaClassToDataType(clz: Class[_]): DataType = clz match {
     // writable
@@ -214,8 +217,16 @@ private[hive] trait HiveInspectors {
 
     case c: Class[_] if c.isArray => ArrayType(javaClassToDataType(c.getComponentType))
 
+    // list type
+    case c: Class[_] if c == classOf[java.util.List[_]] =>
+      logWarning("Failed to catch a component type in List<> because of type erasure in JVM," +
+        " so you need to cast it into the correct type by yourself")
+      ArrayType(NullType)
+
     // Hive seems to return this for struct types?
     case c: Class[_] if c == classOf[java.lang.Object] => NullType
+
+    case c => throw new AnalysisException(s"Unsupported java type $c")
   }
 
   /**

diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class UDFToListInt extends UDF {
+    public List<Integer> evaluate(Object o) {
+        return Arrays.asList(1, 2, 3);
+    }
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class UDFToListString extends UDF {
+    public List<String> evaluate(Object o) {
+        return Arrays.asList("data1", "data2", "data3");
+    }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory}
 import org.apache.hadoop.hive.serde2.{AbstractSerDe, SerDeStats}
 import org.apache.hadoop.io.Writable
+import org.apache.spark.sql.types.UTF8String
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.hive.test.TestHive
 
@@ -133,6 +134,41 @@ class HiveUdfSuite extends QueryTest {
     TestHive.reset()
   }
 
+  test("UDFToListString") {
+    val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
+    testData.registerTempTable("inputTable")
+
+    /**
+     * Converts $"..." into UTF8String(...).
+     * TODO: Catalyst (e.g., CatalystTypeConverters) cannot convert UTF8String
+     * into String correctly because of JVM type erasure.
+     */
+    implicit class StringToUtf8(val sc: StringContext) {
+      def u(args: Any*): UTF8String = UTF8String(sc.s(args :_*))
+    }
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFToListString AS '${classOf[UDFToListString].getName}'")
+    checkAnswer(
+      sql("SELECT testUDFToListString(s) FROM inputTable"),
+      Seq(Row(u"data1" :: u"data2" :: u"data3" :: Nil)))
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListString")
+
+    TestHive.reset()
+  }
+
+  test("UDFToListInt") {
+    val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
+    testData.registerTempTable("inputTable")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFToListInt AS '${classOf[UDFToListInt].getName}'")
+    checkAnswer(
+      sql("SELECT testUDFToListInt(s) FROM inputTable"),
+      Seq(Row(1 :: 2 :: 3 :: Nil)))
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListInt")
+
+    TestHive.reset()
+  }
+
   test("UDFListListInt") {
     val testData = TestHive.sparkContext.parallelize(
       ListListIntCaseClass(Nil) ::