Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,9 @@ case class EqualTo(left: Expression, right: Expression)
// | FALSE | FALSE | TRUE | UNKNOWN |
// | UNKNOWN | UNKNOWN | UNKNOWN | UNKNOWN |
// +---------+---------+---------+---------+
protected override def nullSafeEval(left: Any, right: Any): Any = ordering.equiv(left, right)
protected override def nullSafeEval(left: Any, right: Any): Any = {
left == right || ordering.equiv(left, right)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least, we shoud fix the existing test failures that we found in this PR. But, this fix looks improper, so could we use NormalizeNaNAndZero instead? cc: @cloud-fan @viirya

Copy link
Contributor Author

@tanelk tanelk Aug 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NormalizeNaNAndZero can't help here, because checkConsistencyBetweenInterpretedAndCodegen is done without optimizers.

Also it could introduce new correctness issues with atan2(-0.0, x) and 1.0 / -0.0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed that 0.0 == -0.0 is the expected behavior, but if it is not, then we could leave this as it was change the code gen path.

Copy link
Member

@maropu maropu Aug 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But, how about the case array(-0.0) == array(0.0)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are 100% correct.

It is an interesting problem, where the same comparator is used for both sorting and equality check.
For sorting -0.0 should be smaller than 0.0, but in equality check they should be equal.

Just for reference, it seems that both hive and mysql consider them equal in the equality check:
https://issues.apache.org/jira/browse/HIVE-11174

}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
defineCodeGen(ctx, ev, (c1, c2) => ctx.genEqual(left.dataType, c1, c2))
Expand Down Expand Up @@ -845,7 +847,7 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
} else if (input1 == null || input2 == null) {
false
} else {
ordering.equiv(input1, input2)
input1 == input2 || ordering.equiv(input1, input2)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,27 @@ object LiteralGenerator {
lazy val longLiteralGen: Gen[Literal] =
for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType)

// The floatLiteralGen and doubleLiteralGen will 50% of the time yield arbitrary values
// and 50% of the time will yield some special values that are more likely to reveal
// corner cases. This behavior is similar to the integral value generators.
lazy val floatLiteralGen: Gen[Literal] =
for {
f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2,
Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity)
f <- Gen.oneOf(
Gen.oneOf(
Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity, Float.MinPositiveValue,
Float.MaxValue, -Float.MaxValue, 0.0f, -0.0f, 1.0f, -1.0f),
Arbitrary.arbFloat.arbitrary
)
} yield Literal.create(f, FloatType)

lazy val doubleLiteralGen: Gen[Literal] =
for {
f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2,
Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity)
f <- Gen.oneOf(
Gen.oneOf(
Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity, Double.MinPositiveValue,
Double.MaxValue, -Double.MaxValue, 0.0, -0.0, 1.0, -1.0),
Arbitrary.arbDouble.arbitrary
)
} yield Literal.create(f, DoubleType)

// TODO cache the generated data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,13 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(EqualTo(infinity, infinity), true)
}

test("SPARK-32688: 0.0 and -0.0 should be considered equal") {
checkEvaluation(EqualTo(Literal(0.0), Literal(-0.0)), true)
checkEvaluation(EqualNullSafe(Literal(0.0), Literal(-0.0)), true)
checkEvaluation(EqualTo(Literal(0.0f), Literal(-0.0f)), true)
checkEvaluation(EqualNullSafe(Literal(0.0f), Literal(-0.0f)), true)
}

test("SPARK-22693: InSet should not use global variables") {
val ctx = new CodegenContext
InSet(Literal(1), Set(1, 2, 3, 4)).genCode(ctx)
Expand Down