Skip to content

Commit b1436c7

Browse files
rxingatorsmile
authored andcommitted
[SPARK-21059][SQL] LikeSimplification can NPE on null pattern
## What changes were proposed in this pull request? This patch fixes a bug that can cause NullPointerException in LikeSimplification, when the pattern for like is null. ## How was this patch tested? Added a new unit test case in LikeSimplificationSuite. Author: Reynold Xin <rxin@databricks.com> Closes #18273 from rxin/SPARK-21059.
1 parent 32818d9 commit b1436c7

2 files changed

Lines changed: 28 additions & 17 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -383,22 +383,27 @@ object LikeSimplification extends Rule[LogicalPlan] {
383383

384384
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
385385
case Like(input, Literal(pattern, StringType)) =>
386-
pattern.toString match {
387-
case startsWith(prefix) if !prefix.endsWith("\\") =>
388-
StartsWith(input, Literal(prefix))
389-
case endsWith(postfix) =>
390-
EndsWith(input, Literal(postfix))
391-
// 'a%a' pattern is basically same with 'a%' && '%a'.
392-
// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
393-
case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") =>
394-
And(GreaterThanOrEqual(Length(input), Literal(prefix.size + postfix.size)),
395-
And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
396-
case contains(infix) if !infix.endsWith("\\") =>
397-
Contains(input, Literal(infix))
398-
case equalTo(str) =>
399-
EqualTo(input, Literal(str))
400-
case _ =>
401-
Like(input, Literal.create(pattern, StringType))
386+
if (pattern == null) {
387+
// If pattern is null, return null value directly, since "col like null" == null.
388+
Literal(null, BooleanType)
389+
} else {
390+
pattern.toString match {
391+
case startsWith(prefix) if !prefix.endsWith("\\") =>
392+
StartsWith(input, Literal(prefix))
393+
case endsWith(postfix) =>
394+
EndsWith(input, Literal(postfix))
395+
// 'a%a' pattern is basically same with 'a%' && '%a'.
396+
// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
397+
case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") =>
398+
And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
399+
And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
400+
case contains(infix) if !infix.endsWith("\\") =>
401+
Contains(input, Literal(infix))
402+
case equalTo(str) =>
403+
EqualTo(input, Literal(str))
404+
case _ =>
405+
Like(input, Literal.create(pattern, StringType))
406+
}
402407
}
403408
}
404409
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717

1818
package org.apache.spark.sql.catalyst.optimizer
1919

20-
/* Implicit conversions */
2120
import org.apache.spark.sql.catalyst.dsl.expressions._
2221
import org.apache.spark.sql.catalyst.dsl.plans._
2322
import org.apache.spark.sql.catalyst.expressions._
2423
import org.apache.spark.sql.catalyst.plans.logical._
2524
import org.apache.spark.sql.catalyst.plans.PlanTest
2625
import org.apache.spark.sql.catalyst.rules._
26+
import org.apache.spark.sql.types.{BooleanType, StringType}
2727

2828
class LikeSimplificationSuite extends PlanTest {
2929

@@ -100,4 +100,10 @@ class LikeSimplificationSuite extends PlanTest {
100100

101101
comparePlans(optimized, correctAnswer)
102102
}
103+
104+
test("null pattern") {
105+
val originalQuery = testRelation.where('a like Literal(null, StringType)).analyze
106+
val optimized = Optimize.execute(originalQuery)
107+
comparePlans(optimized, testRelation.where(Literal(null, BooleanType)).analyze)
108+
}
103109
}

0 commit comments

Comments
 (0)