Skip to content

Commit 1821fc1

Browse files
maropumarmbrus
authored andcommitted
[SPARK-6747] [SQL] Throw an AnalysisException when unsupported Java list types used in Hive UDF
The current implementation can't handle List<> as a return type in Hive UDF and throws meaningless Match Error. We assume an UDF below; public class UDFToListString extends UDF { public List<String> evaluate(Object o) { return Arrays.asList("xxx", "yyy", "zzz"); } } An exception of scala.MatchError is thrown as follows when the UDF used; scala.MatchError: interface java.util.List (of class java.lang.Class) at org.apache.spark.sql.hive.HiveInspectors$class.javaClassToDataType(HiveInspectors.scala:174) at org.apache.spark.sql.hive.HiveSimpleUdf.javaClassToDataType(hiveUdfs.scala:76) at org.apache.spark.sql.hive.HiveSimpleUdf.dataType$lzycompute(hiveUdfs.scala:106) at org.apache.spark.sql.hive.HiveSimpleUdf.dataType(hiveUdfs.scala:106) at org.apache.spark.sql.catalyst.expressions.Alias.toAttribute(namedExpressions.scala:131) at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:95) at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:94) at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33) at scala.collection.TraversableLike$$anonfun$collect$1.apply(TraversableLike.scala:278) ... To make udf developers more understood, we need to throw a more suitable exception. Author: Takeshi YAMAMURO <[email protected]> Closes #7248 from maropu/FixBugInHiveInspectors and squashes the following commits: 1c3df2a [Takeshi YAMAMURO] Fix comments 56305de [Takeshi YAMAMURO] Fix conflicts 92ed7a6 [Takeshi YAMAMURO] Throw an exception when java list type used 2844a8e [Takeshi YAMAMURO] Apply comments 7114a47 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite fdb2ae4 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String af61f2e [Takeshi YAMAMURO] Remove a new type 7f812fd [Takeshi YAMAMURO] Fix code-style errors 6984bf4 [Takeshi YAMAMURO] Apply review comments 93e3d4e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString ee232db [Takeshi YAMAMURO] Support List as a return type in Hive UDF 1e82316 [Takeshi YAMAMURO] Apply comments 21e8763 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite a488712 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String 1c7b9d1 [Takeshi YAMAMURO] Remove a new type f965c34 [Takeshi YAMAMURO] Fix code-style errors 9406416 [Takeshi YAMAMURO] Apply review comments e21ce7e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString e553f10 [Takeshi YAMAMURO] Support List as a return type in Hive UDF
1 parent 929dfa2 commit 1821fc1

4 files changed

Lines changed: 98 additions & 2 deletions

File tree

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ import org.apache.hadoop.{io => hadoopIo}
2626

2727
import org.apache.spark.sql.catalyst.expressions._
2828
import org.apache.spark.sql.catalyst.util.DateTimeUtils
29-
import org.apache.spark.sql.types
3029
import org.apache.spark.sql.types._
30+
import org.apache.spark.sql.{AnalysisException, types}
3131
import org.apache.spark.unsafe.types.UTF8String
3232

3333
/* Implicit conversions */
@@ -218,6 +218,14 @@ private[hive] trait HiveInspectors {
218218

219219
// Hive seems to return this for struct types?
220220
case c: Class[_] if c == classOf[java.lang.Object] => NullType
221+
222+
// java list type unsupported
223+
case c: Class[_] if c == classOf[java.util.List[_]] =>
224+
throw new AnalysisException(
225+
"List type in java is unsupported because " +
226+
"JVM type erasure makes spark fail to catch a component type in List<>")
227+
228+
case c => throw new AnalysisException(s"Unsupported java type $c")
221229
}
222230

223231
/**
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.hive.execution;
19+
20+
import org.apache.hadoop.hive.ql.exec.UDF;
21+
22+
import java.util.Arrays;
23+
import java.util.List;
24+
25+
public class UDFToListInt extends UDF {
26+
public List<Integer> evaluate(Object o) {
27+
return Arrays.asList(1, 2, 3);
28+
}
29+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.hive.execution;
19+
20+
import org.apache.hadoop.hive.ql.exec.UDF;
21+
22+
import java.util.Arrays;
23+
import java.util.List;
24+
25+
public class UDFToListString extends UDF {
26+
public List<String> evaluate(Object o) {
27+
return Arrays.asList("data1", "data2", "data3");
28+
}
29+
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
2828
import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory}
2929
import org.apache.hadoop.hive.serde2.{AbstractSerDe, SerDeStats}
3030
import org.apache.hadoop.io.Writable
31-
import org.apache.spark.sql.{QueryTest, Row}
31+
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
3232
import org.apache.spark.sql.hive.test.TestHive
3333

3434
import org.apache.spark.util.Utils
@@ -133,6 +133,36 @@ class HiveUDFSuite extends QueryTest {
133133
TestHive.reset()
134134
}
135135

136+
test("UDFToListString") {
137+
val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
138+
testData.registerTempTable("inputTable")
139+
140+
sql(s"CREATE TEMPORARY FUNCTION testUDFToListString AS '${classOf[UDFToListString].getName}'")
141+
val errMsg = intercept[AnalysisException] {
142+
sql("SELECT testUDFToListString(s) FROM inputTable")
143+
}
144+
assert(errMsg.getMessage === "List type in java is unsupported because " +
145+
"JVM type erasure makes spark fail to catch a component type in List<>;")
146+
147+
sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListString")
148+
TestHive.reset()
149+
}
150+
151+
test("UDFToListInt") {
152+
val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
153+
testData.registerTempTable("inputTable")
154+
155+
sql(s"CREATE TEMPORARY FUNCTION testUDFToListInt AS '${classOf[UDFToListInt].getName}'")
156+
val errMsg = intercept[AnalysisException] {
157+
sql("SELECT testUDFToListInt(s) FROM inputTable")
158+
}
159+
assert(errMsg.getMessage === "List type in java is unsupported because " +
160+
"JVM type erasure makes spark fail to catch a component type in List<>;")
161+
162+
sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListInt")
163+
TestHive.reset()
164+
}
165+
136166
test("UDFListListInt") {
137167
val testData = TestHive.sparkContext.parallelize(
138168
ListListIntCaseClass(Nil) ::

0 commit comments

Comments
 (0)