@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
3030import org .apache .spark .sql .catalyst .rules ._
3131import org .apache .spark .sql .internal .SQLConf
3232import org .apache .spark .sql .types ._
33+ import org .apache .spark .unsafe .types .UTF8String
3334
3435/*
3536 * Optimization rules defined in this file should not affect the structure of the logical plan.
@@ -542,36 +543,68 @@ object LikeSimplification extends Rule[LogicalPlan] {
542543 private val contains = " %([^_%]+)%" .r
543544 private val equalTo = " ([^_%]*)" .r
544545
546+ private def simplifyLike (
547+ input : Expression , pattern : String , escapeChar : Char = '\\ ' ): Option [Expression ] = {
548+ if (pattern.contains(escapeChar)) {
549+ // There are three different situations when pattern containing escapeChar:
550+ // 1. pattern contains invalid escape sequence, e.g. 'm\aca'
551+ // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'
552+ // 3. pattern contains escaped escape character, e.g. 'ma\\ca'
553+ // Although there are patterns can be optimized if we handle the escape first, we just
554+ // skip this rule if pattern contains any escapeChar for simplicity.
555+ None
556+ } else {
557+ pattern match {
558+ case startsWith(prefix) =>
559+ Some (StartsWith (input, Literal (prefix)))
560+ case endsWith(postfix) =>
561+ Some (EndsWith (input, Literal (postfix)))
562+ // 'a%a' pattern is basically same with 'a%' && '%a'.
563+ // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
564+ case startsAndEndsWith(prefix, postfix) =>
565+ Some (And (GreaterThanOrEqual (Length (input), Literal (prefix.length + postfix.length)),
566+ And (StartsWith (input, Literal (prefix)), EndsWith (input, Literal (postfix)))))
567+ case contains(infix) =>
568+ Some (Contains (input, Literal (infix)))
569+ case equalTo(str) =>
570+ Some (EqualTo (input, Literal (str)))
571+ case _ => None
572+ }
573+ }
574+ }
575+
576+ private def simplifyMultiLike (
577+ child : Expression , patterns : Seq [UTF8String ], multi : MultiLikeBase ): Expression = {
578+ val (remainPatternMap, replacementMap) =
579+ patterns.map { p => p -> simplifyLike(child, p.toString)}.partition(_._2.isEmpty)
580+ val remainPatterns = remainPatternMap.map(_._1)
581+ val replacements = replacementMap.map(_._2.get)
582+ if (replacements.isEmpty) {
583+ multi
584+ } else {
585+ multi match {
586+ case l : LikeAll => And (replacements.reduceLeft(And ), l.copy(patterns = remainPatterns))
587+ case l : NotLikeAll =>
588+ And (replacements.map(Not (_)).reduceLeft(And ), l.copy(patterns = remainPatterns))
589+ case l : LikeAny => Or (replacements.reduceLeft(Or ), l.copy(patterns = remainPatterns))
590+ case l : NotLikeAny =>
591+ Or (replacements.map(Not (_)).reduceLeft(Or ), l.copy(patterns = remainPatterns))
592+ }
593+ }
594+ }
595+
545596 def apply (plan : LogicalPlan ): LogicalPlan = plan transformAllExpressions {
546597 case l @ Like (input, Literal (pattern, StringType ), escapeChar) =>
547598 if (pattern == null ) {
548599 // If pattern is null, return null value directly, since "col like null" == null.
549600 Literal (null , BooleanType )
550601 } else {
551- pattern.toString match {
552- // There are three different situations when pattern containing escapeChar:
553- // 1. pattern contains invalid escape sequence, e.g. 'm\aca'
554- // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'
555- // 3. pattern contains escaped escape character, e.g. 'ma\\ca'
556- // Although there are patterns can be optimized if we handle the escape first, we just
557- // skip this rule if pattern contains any escapeChar for simplicity.
558- case p if p.contains(escapeChar) => l
559- case startsWith(prefix) =>
560- StartsWith (input, Literal (prefix))
561- case endsWith(postfix) =>
562- EndsWith (input, Literal (postfix))
563- // 'a%a' pattern is basically same with 'a%' && '%a'.
564- // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
565- case startsAndEndsWith(prefix, postfix) =>
566- And (GreaterThanOrEqual (Length (input), Literal (prefix.length + postfix.length)),
567- And (StartsWith (input, Literal (prefix)), EndsWith (input, Literal (postfix))))
568- case contains(infix) =>
569- Contains (input, Literal (infix))
570- case equalTo(str) =>
571- EqualTo (input, Literal (str))
572- case _ => l
573- }
602+ simplifyLike(input, pattern.toString, escapeChar).getOrElse(l)
574603 }
604+ case l @ LikeAll (child, patterns) => simplifyMultiLike(child, patterns, l)
605+ case l @ NotLikeAll (child, patterns) => simplifyMultiLike(child, patterns, l)
606+ case l @ LikeAny (child, patterns) => simplifyMultiLike(child, patterns, l)
607+ case l @ NotLikeAny (child, patterns) => simplifyMultiLike(child, patterns, l)
575608 }
576609}
577610
0 commit comments