Skip to content

Commit 1101938

Browse files
committed
[SPARK-27421][SQL] Fix filter for int column and value class java.lang.String when pruning partition column
### What changes were proposed in this pull request? This pr fix filter for int column and value class java.lang.String when pruning partition column. How to reproduce this issue: ```scala spark.sql("CREATE table test (name STRING) partitioned by (id int) STORED AS PARQUET") spark.sql("CREATE VIEW test_view as select cast(id as string) as id, name from test") spark.sql("SELECT * FROM test_view WHERE id = '0'").explain ``` ``` 20/11/15 06:19:01 INFO audit: ugi=root ip=unknown-ip-addr cmd=get_partitions_by_filter : db=default tbl=test 20/11/15 06:19:01 INFO MetaStoreDirectSql: Unable to push down SQL filter: Cannot push down filter for int column and value class java.lang.String 20/11/15 06:19:01 ERROR SparkSQLDriver: Failed in [SELECT * FROM test_view WHERE id = '0'] java.lang.RuntimeException: Caught Hive MetaException attempting to get partition metadata by filter from Hive. You can set the Spark configuration setting spark.sql.hive.manageFilesourcePartitions to false to work around this problem, however this will result in degraded performance. Please report a bug: https://issues.apache.org/jira/browse/SPARK at org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:828) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$getPartitionsByFilter$1(HiveClientImpl.scala:745) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294) at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227) at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226) at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276) at org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:743) ``` ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30380 from wangyum/SPARK-27421. Authored-by: Yuming Wang <[email protected]> Signed-off-by: Yuming Wang <[email protected]> (cherry picked from commit 014e1fb) Signed-off-by: Yuming Wang <[email protected]>
1 parent c301d9c commit 1101938

2 files changed

Lines changed: 9 additions & 2 deletions

File tree

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
729729
def unapply(expr: Expression): Option[Attribute] = {
730730
expr match {
731731
case attr: Attribute => Some(attr)
732-
case Cast(child @ AtomicType(), dt: AtomicType, _)
732+
case Cast(child @ IntegralType(), dt: IntegralType, _)
733733
if Cast.canUpCast(child.dataType.asInstanceOf[AtomicType], dt) => unapply(child)
734734
case _ => None
735735
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
2828
import org.apache.spark.sql.catalyst.catalog._
2929
import org.apache.spark.sql.catalyst.dsl.expressions._
3030
import org.apache.spark.sql.catalyst.expressions._
31-
import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType}
31+
import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StringType, StructType}
3232
import org.apache.spark.util.Utils
3333

3434
class HivePartitionFilteringSuite(version: String)
@@ -276,6 +276,13 @@ class HivePartitionFilteringSuite(version: String)
276276
buildClient(new Configuration(), sharesHadoopClasses = false)
277277
}
278278

279+
test("getPartitionsByFilter: chunk in ('ab', 'ba') and ((cast(ds as string)>'20170102')") {
280+
val day = (20170101 to 20170103, 0 to 4, Seq("ab", "ba"))
281+
testMetastorePartitionFiltering(
282+
attr("chunk").in("ab", "ba") && (attr("ds").cast(StringType) > "20170102"),
283+
day :: Nil)
284+
}
285+
279286
private def testMetastorePartitionFiltering(
280287
filterExpr: Expression,
281288
expectedDs: Seq[Int],

0 commit comments

Comments
 (0)