apache · mgaido91 · Jun 20, 2018 · Jun 20, 2018 · Jun 20, 2018 · Jun 21, 2018
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -56,7 +56,10 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
     case _ => DoubleType
   }
 
-  private lazy val sumDataType = resultType
+  private lazy val sumDataType = child.dataType match {
+    case LongType => DecimalType.BigIntDecimal
+    case _ => resultType
+  }
 
   private lazy val sum = AttributeReference("sum", sumDataType)()
 
@@ -89,5 +92,11 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
     )
   }
 
-  override lazy val evaluateExpression: Expression = sum
+  override lazy val evaluateExpression: Expression = {
+    if (sumDataType == resultType) {
+      sum
+    } else {
+      Cast(sum, resultType)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -35,17 +35,36 @@ import org.apache.spark.unsafe.types.CalendarInterval
   """)
 case class UnaryMinus(child: Expression) extends UnaryExpression
     with ExpectsInputTypes with NullIntolerant {
+  private val checkOverflow = SQLConf.get.arithmeticOperationOverflowCheck
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
   override def dataType: DataType = child.dataType
 
   override def toString: String = s"-$child"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
     case _: DecimalType => defineCodeGen(ctx, ev, c => s"$c.unary_$$minus()")
+    case ByteType | ShortType if checkOverflow =>
+      nullSafeCodeGen(ctx, ev, eval => {
+        val javaBoxedType = CodeGenerator.boxedType(dataType)
+        val javaType = CodeGenerator.javaType(dataType)
+        val originValue = ctx.freshName("origin")
+        s"""
+           |$javaType $originValue = ($javaType)($eval);
+           |if ($originValue == $javaBoxedType.MIN_VALUE) {
+           |  throw new ArithmeticException("- " + $originValue + " caused overflow.");
+           |}
+           |${ev.value} = ($javaType)(-($originValue));
+           """.stripMargin
+      })
+    case IntegerType | LongType if checkOverflow =>
+      nullSafeCodeGen(ctx, ev, eval => {
+        val mathClass = classOf[Math].getName
+        s"${ev.value} = $mathClass.negateExact(-($eval));"
+      })
     case dt: NumericType => nullSafeCodeGen(ctx, ev, eval => {
       val originValue = ctx.freshName("origin")
       // codegen would fail to compile if we just write (-($c))
@@ -117,6 +136,8 @@ case class Abs(child: Expression)
 
 abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
 
+  protected val checkOverflow = SQLConf.get.arithmeticOperationOverflowCheck
+
   override def dataType: DataType = left.dataType
 
   override lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess
@@ -129,17 +150,57 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
   def calendarIntervalMethod: String =
     sys.error("BinaryArithmetics must override either calendarIntervalMethod or genCode")
 
+  /** Name of the function for the exact version of this expression in [[Math]]. */
+  def exactMathMethod: String =
+    sys.error("BinaryArithmetics must override either exactMathMethod or genCode")
+
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
     case _: DecimalType =>
+      // Overflow is handled in the CheckOverflow operator
       defineCodeGen(ctx, ev, (eval1, eval2) => s"$eval1.$decimalMethod($eval2)")
     case CalendarIntervalType =>
       defineCodeGen(ctx, ev, (eval1, eval2) => s"$eval1.$calendarIntervalMethod($eval2)")
     // byte and short are casted into int when add, minus, times or divide
     case ByteType | ShortType =>
-      defineCodeGen(ctx, ev,
-        (eval1, eval2) => s"(${CodeGenerator.javaType(dataType)})($eval1 $symbol $eval2)")
-    case _ =>
-      defineCodeGen(ctx, ev, (eval1, eval2) => s"$eval1 $symbol $eval2")
+      nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+        val tmpResult = ctx.freshName("tmpResult")
+        val overflowCheck = if (checkOverflow) {
+          val javaType = CodeGenerator.boxedType(dataType)
+          s"""
+             |if ($tmpResult < $javaType.MIN_VALUE || $tmpResult > $javaType.MAX_VALUE) {
+             |  throw new ArithmeticException($eval1 + " $symbol " + $eval2 + " caused overflow.");
+             |}
+           """.stripMargin
+        } else {
+          ""
+        }
+        s"""
+           |${CodeGenerator.JAVA_INT} $tmpResult = $eval1 $symbol $eval2;
+           |$overflowCheck
+           |${ev.value} = (${CodeGenerator.javaType(dataType)})($tmpResult);
+         """.stripMargin
+      })
+    case IntegerType | LongType =>
+      nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+        val operation = if (checkOverflow) {
+          val mathClass = classOf[Math].getName
+          s"$mathClass.$exactMathMethod($eval1, $eval2)"
+        } else {
+          s"$eval1 $symbol $eval2"
+        }
+        s"""
+           |${ev.value} = $operation;
+         """.stripMargin
+      })
+    case DoubleType | FloatType =>
+      // When Double/Float overflows, there can be 2 cases:
+      // - precision loss: according to SQL standard, the number is truncated;
+      // - returns (+/-)Infinite: same behavior also other DBs have (eg. Postgres)
+      nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+        s"""
+           |${ev.value} = $eval1 $symbol $eval2;
+         """.stripMargin
+      })
   }
 }
 
@@ -164,7 +225,7 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def calendarIntervalMethod: String = "add"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = {
     if (dataType.isInstanceOf[CalendarIntervalType]) {
@@ -173,6 +234,8 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
       numeric.plus(input1, input2)
     }
   }
+
+  override def exactMathMethod: String = "addExact"
 }
 
 @ExpressionDescription(
@@ -192,7 +255,7 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti
 
   override def calendarIntervalMethod: String = "subtract"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = {
     if (dataType.isInstanceOf[CalendarIntervalType]) {
@@ -201,6 +264,8 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti
       numeric.minus(input1, input2)
     }
   }
+
+  override def exactMathMethod: String = "subtractExact"
 }
 
 @ExpressionDescription(
@@ -217,9 +282,11 @@ case class Multiply(left: Expression, right: Expression) extends BinaryArithmeti
   override def symbol: String = "*"
   override def decimalMethod: String = "$times"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
+
+  override def exactMathMethod: String = "multiplyExact"
 }
 
 // Common base trait for Divide and Remainder, since these two classes are almost identical

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -60,8 +60,13 @@ object TypeUtils {
     }
   }
 
-  def getNumeric(t: DataType): Numeric[Any] =
-    t.asInstanceOf[NumericType].numeric.asInstanceOf[Numeric[Any]]
+  def getNumeric(t: DataType, exactNumericRequired: Boolean = false): Numeric[Any] = {
+    if (exactNumericRequired) {
+      t.asInstanceOf[NumericType].exactNumeric.asInstanceOf[Numeric[Any]]
+    } else {
+      t.asInstanceOf[NumericType].numeric.asInstanceOf[Numeric[Any]]
+    }
+  }
 
   def getInterpretedOrdering(t: DataType): Ordering[Any] = {
     t match {

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1780,6 +1780,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val ARITHMETIC_OPERATION_OVERFLOW_CHECK =
+    buildConf("spark.sql.arithmeticOperations.failOnOverFlow")
+      .doc("If it is set to true, all arithmetic operations on non-decimal fields throw an " +
+        "exception if an overflow occurs. If it is false (default), in case of overflow a wrong " +
+        "result is returned.")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE =
     buildConf("spark.sql.legacy.parser.havingWithoutGroupByAsWhere")
       .internal()
@@ -2287,6 +2296,8 @@ class SQLConf extends Serializable with Logging {
 
   def decimalOperationsNullOnOverflow: Boolean = getConf(DECIMAL_OPERATIONS_NULL_ON_OVERFLOW)
 
+  def arithmeticOperationOverflowCheck: Boolean = getConf(ARITHMETIC_OPERATION_OVERFLOW_CHECK)
+
   def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)
 
   def continuousStreamingEpochBacklogQueueSize: Int =

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
@@ -142,6 +142,8 @@ abstract class NumericType extends AtomicType {
   // desugared by the compiler into an argument to the objects constructor. This means there is no
   // longer a no argument constructor and thus the JVM cannot serialize the object anymore.
   private[sql] val numeric: Numeric[InternalType]
+
+  private[sql] def exactNumeric: Numeric[InternalType] = numeric
 }
 
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
@@ -37,6 +37,7 @@ class ByteType private() extends IntegralType {
   private[sql] val numeric = implicitly[Numeric[Byte]]
   private[sql] val integral = implicitly[Integral[Byte]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
+  override private[sql] val exactNumeric = ByteExactNumeric
 
   /**
    * The default size of a value of the ByteType is 1 byte.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -22,6 +22,7 @@ import java.math.{BigInteger, MathContext, RoundingMode}
 
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A mutable implementation of BigDecimal that can hold a Long if values are small enough.
@@ -228,6 +229,12 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     if (decimalVal.eq(null)) {
       longVal / POW_10(_scale)
     } else {
+      if (SQLConf.get.arithmeticOperationOverflowCheck) {
+        // This will throw an exception if overflow occurs
+        if (decimalVal.compare(LONG_MIN_BIG_DEC) < 0 || decimalVal.compare(LONG_MAX_BIG_DEC) > 0) {
+          throw new ArithmeticException("Overflow")
+        }
+      }
       decimalVal.longValue()
     }
   }
@@ -456,6 +463,9 @@ object Decimal {
   private val LONG_MAX_BIG_INT = BigInteger.valueOf(JLong.MAX_VALUE)
   private val LONG_MIN_BIG_INT = BigInteger.valueOf(JLong.MIN_VALUE)
 
+  private val LONG_MAX_BIG_DEC = BigDecimal.valueOf(JLong.MAX_VALUE)
+  private val LONG_MIN_BIG_DEC = BigDecimal.valueOf(JLong.MIN_VALUE)
+
   def apply(value: Double): Decimal = new Decimal().set(value)
 
   def apply(value: Long): Decimal = new Decimal().set(value)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
@@ -37,6 +37,7 @@ class IntegerType private() extends IntegralType {
   private[sql] val numeric = implicitly[Numeric[Int]]
   private[sql] val integral = implicitly[Integral[Int]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
+  override private[sql] val exactNumeric = IntegerExactNumeric
 
   /**
    * The default size of a value of the IntegerType is 4 bytes.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
@@ -37,6 +37,7 @@ class LongType private() extends IntegralType {
   private[sql] val numeric = implicitly[Numeric[Long]]
   private[sql] val integral = implicitly[Integral[Long]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
+  override private[sql] val exactNumeric = LongExactNumeric
 
   /**
    * The default size of a value of the LongType is 8 bytes.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
@@ -37,6 +37,7 @@ class ShortType private() extends IntegralType {
   private[sql] val numeric = implicitly[Numeric[Short]]
   private[sql] val integral = implicitly[Integral[Short]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
+  override private[sql] val exactNumeric = ShortExactNumeric
 
   /**
    * The default size of a value of the ShortType is 2 bytes.