Skip to content

Commit 40e4db0

Browse files
gatorsmilecloud-fan
authored andcommitted
[SPARK-25402][SQL] Null handling in BooleanSimplification
## What changes were proposed in this pull request? This PR is to fix the null handling in BooleanSimplification. In the rule BooleanSimplification, there are two cases that do not properly handle null values. The optimization is not right if either side is null. This PR is to fix them. ## How was this patch tested? Added test cases Closes #22390 from gatorsmile/fixBooleanSimplification. Authored-by: gatorsmile <gatorsmile@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com> (cherry picked from commit 79cc597) Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 0dbf145 commit 40e4db0

3 files changed

Lines changed: 60 additions & 8 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,15 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
263263
case TrueLiteral Or _ => TrueLiteral
264264
case _ Or TrueLiteral => TrueLiteral
265265

266-
case a And b if Not(a).semanticEquals(b) => FalseLiteral
267-
case a Or b if Not(a).semanticEquals(b) => TrueLiteral
268-
case a And b if a.semanticEquals(Not(b)) => FalseLiteral
269-
case a Or b if a.semanticEquals(Not(b)) => TrueLiteral
266+
case a And b if Not(a).semanticEquals(b) =>
267+
If(IsNull(a), Literal.create(null, a.dataType), FalseLiteral)
268+
case a And b if a.semanticEquals(Not(b)) =>
269+
If(IsNull(b), Literal.create(null, b.dataType), FalseLiteral)
270+
271+
case a Or b if Not(a).semanticEquals(b) =>
272+
If(IsNull(a), Literal.create(null, a.dataType), TrueLiteral)
273+
case a Or b if a.semanticEquals(Not(b)) =>
274+
If(IsNull(b), Literal.create(null, b.dataType), TrueLiteral)
270275

271276
case a And b if a.semanticEquals(b) => a
272277
case a Or b if a.semanticEquals(b) => a

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
2727
import org.apache.spark.sql.catalyst.plans.logical._
2828
import org.apache.spark.sql.catalyst.rules._
2929
import org.apache.spark.sql.internal.SQLConf
30+
import org.apache.spark.sql.types.BooleanType
3031

3132
class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
3233

@@ -37,6 +38,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
3738
Batch("Constant Folding", FixedPoint(50),
3839
NullPropagation,
3940
ConstantFolding,
41+
SimplifyConditionals,
4042
BooleanSimplification,
4143
PruneFilters) :: Nil
4244
}
@@ -48,6 +50,14 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
4850
testRelation.output, Seq(Row(1, 2, 3, "abc"))
4951
)
5052

53+
val testNotNullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull,
54+
'd.string.notNull, 'e.boolean.notNull, 'f.boolean.notNull, 'g.boolean.notNull,
55+
'h.boolean.notNull)
56+
57+
val testNotNullableRelationWithData = LocalRelation.fromExternalRows(
58+
testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc"))
59+
)
60+
5161
private def checkCondition(input: Expression, expected: LogicalPlan): Unit = {
5262
val plan = testRelationWithData.where(input).analyze
5363
val actual = Optimize.execute(plan)
@@ -61,6 +71,13 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
6171
comparePlans(actual, correctAnswer)
6272
}
6373

74+
private def checkConditionInNotNullableRelation(
75+
input: Expression, expected: LogicalPlan): Unit = {
76+
val plan = testNotNullableRelationWithData.where(input).analyze
77+
val actual = Optimize.execute(plan)
78+
comparePlans(actual, expected)
79+
}
80+
6481
test("a && a => a") {
6582
checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
6683
checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
@@ -174,10 +191,30 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
174191
}
175192

176193
test("Complementation Laws") {
177-
checkCondition('a && !'a, testRelation)
178-
checkCondition(!'a && 'a, testRelation)
194+
checkConditionInNotNullableRelation('e && !'e, testNotNullableRelation)
195+
checkConditionInNotNullableRelation(!'e && 'e, testNotNullableRelation)
196+
197+
checkConditionInNotNullableRelation('e || !'e, testNotNullableRelationWithData)
198+
checkConditionInNotNullableRelation(!'e || 'e, testNotNullableRelationWithData)
199+
}
200+
201+
test("Complementation Laws - null handling") {
202+
checkCondition('e && !'e,
203+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
204+
checkCondition(!'e && 'e,
205+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
206+
207+
checkCondition('e || !'e,
208+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
209+
checkCondition(!'e || 'e,
210+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
211+
}
212+
213+
test("Complementation Laws - negative case") {
214+
checkCondition('e && !'f, testRelationWithData.where('e && !'f).analyze)
215+
checkCondition(!'f && 'e, testRelationWithData.where(!'f && 'e).analyze)
179216

180-
checkCondition('a || !'a, testRelationWithData)
181-
checkCondition(!'a || 'a, testRelationWithData)
217+
checkCondition('e || !'f, testRelationWithData.where('e || !'f).analyze)
218+
checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze)
182219
}
183220
}

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,4 +2569,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
25692569
check(lit(2).cast("int"), $"c" === 2, Seq(Row(1, 1, 2, 0), Row(1, 1, 2, 1)))
25702570
check(lit(2).cast("int"), $"c" =!= 2, Seq())
25712571
}
2572+
2573+
test("SPARK-25402 Null handling in BooleanSimplification") {
2574+
val schema = StructType.fromDDL("a boolean, b int")
2575+
val rows = Seq(Row(null, 1))
2576+
2577+
val rdd = sparkContext.parallelize(rows)
2578+
val df = spark.createDataFrame(rdd, schema)
2579+
2580+
checkAnswer(df.where("(NOT a) OR a"), Seq.empty)
2581+
}
25722582
}

0 commit comments

Comments
 (0)