apache · wangyum · May 25, 2017 · May 25, 2017 · May 26, 2017 · May 26, 2017
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1382,8 +1382,8 @@ test_that("column functions", {
   c20 <- to_timestamp(c) + to_timestamp(c, "yyyy") + to_date(c, "yyyy")
   c21 <- posexplode_outer(c) + explode_outer(c)
   c22 <- not(c)
-  c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") +
-    trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
+  c23 <- trunc(to_date(c), "year") + trunc(to_date(c), "yyyy") + trunc(to_date(c), "yy") +
+    trunc(to_date(c), "month") + trunc(to_date(c), "mon") + trunc(to_date(c), "mm")
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -1028,20 +1028,28 @@ def to_timestamp(col, format=None):
 
 
 @since(1.5)
-def trunc(date, format):
+def trunc(data, format):
     """
-    Returns date truncated to the unit specified by the format.
+    Returns date truncated to the unit specified by the format or
+    number truncated by specified decimal places.
 
     :param format: 'year', 'YYYY', 'yy' or 'month', 'mon', 'mm'
 
     >>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
-    >>> df.select(trunc(df.d, 'year').alias('year')).collect()
+    >>> df.select(trunc(to_date(df.d), 'year').alias('year')).collect()
     [Row(year=datetime.date(1997, 1, 1))]
-    >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
+    >>> df.select(trunc(to_date(df.d), 'mon').alias('month')).collect()
     [Row(month=datetime.date(1997, 2, 1))]
+    >>> df = spark.createDataFrame([(1234567891.1234567891,)], ['d'])
+    >>> df.select(trunc(df.d, 4).alias('positive')).collect()
+    [Row(positive=1234567891.1234)]
+    >>> df.select(trunc(df.d, -4).alias('negative')).collect()
+    [Row(negative=1234560000.0)]
+    >>> df.select(trunc(df.d, 0).alias('zero')).collect()
+    [Row(zero=1234567891.0)]
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
+    return Column(sc._jvm.functions.trunc(_to_java_column(data), format))
 
 
 @since(1.5)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -389,7 +389,6 @@ object FunctionRegistry {
     expression[ParseToDate]("to_date"),
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
-    expression[TruncDate]("trunc"),
     expression[UnixTimestamp]("unix_timestamp"),
     expression[DayOfWeek]("dayofweek"),
     expression[WeekOfYear]("weekofyear"),
@@ -424,6 +423,7 @@ object FunctionRegistry {
     expression[CurrentDatabase]("current_database"),
     expression[CallMethodViaReflection]("reflect"),
     expression[CallMethodViaReflection]("java_method"),
+    expression[Trunc]("trunc"),
 
     // grouping sets
     expression[Cube]("cube"),

diff --git a/...talyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/...talyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1267,86 +1267,6 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.",
-  extended = """
-    Examples:
-      > SELECT _FUNC_('2009-02-12', 'MM');
-       2009-02-01
-      > SELECT _FUNC_('2015-10-27', 'YEAR');
-       2015-01-01
-  """)
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType)
-  override def dataType: DataType = DateType
-  override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
-
-  private lazy val truncLevel: Int =
-    DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
-
-  override def eval(input: InternalRow): Any = {
-    val level = if (format.foldable) {
-      truncLevel
-    } else {
-      DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
-    }
-    if (level == -1) {
-      // unknown format
-      null
-    } else {
-      val d = date.eval(input)
-      if (d == null) {
-        null
-      } else {
-        DateTimeUtils.truncDate(d.asInstanceOf[Int], level)
-      }
-    }
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-
-    if (format.foldable) {
-      if (truncLevel == -1) {
-        ev.copy(code = s"""
-          boolean ${ev.isNull} = true;
-          ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
-      } else {
-        val d = date.genCode(ctx)
-        ev.copy(code = s"""
-          ${d.code}
-          boolean ${ev.isNull} = ${d.isNull};
-          ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
-          if (!${ev.isNull}) {
-            ${ev.value} = $dtu.truncDate(${d.value}, $truncLevel);
-          }""")
-      }
-    } else {
-      nullSafeCodeGen(ctx, ev, (dateVal, fmt) => {
-        val form = ctx.freshName("form")
-        s"""
-          int $form = $dtu.parseTruncLevel($fmt);
-          if ($form == -1) {
-            ${ev.isNull} = true;
-          } else {
-            ${ev.value} = $dtu.truncDate($dateVal, $form);
-          }
-        """
-      })
-    }
-  }
-}
-
 /**
  * Returns the number of days from startDate to endDate.
  */

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -21,6 +21,7 @@ import java.util.UUID
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.catalyst.util.{BigDecimalUtils, DateTimeUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -132,3 +133,154 @@ case class Uuid() extends LeafExpression {
       s"UTF8String.fromString(java.util.UUID.randomUUID().toString());", isNull = "false")
   }
 }
+
+/**
+ * Returns date truncated to the unit specified by the format or
+ * numeric truncated to scale decimal places.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+      _FUNC_(data[, fmt]) - Returns `data` truncated by the format model `fmt`.
+        If `data` is DateType, returns `data` with the time portion of the day truncated to the unit specified by the format model `fmt`.
+        If `data` is DecimalType/DoubleType, returns `data` truncated to `fmt` decimal places.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-02-12', 'MM');
+       2009-02-01.
+      > SELECT _FUNC_('2015-10-27', 'YEAR');
+       2015-01-01
+      > SELECT _FUNC_('1989-03-13');
+       1989-03-01
+      > SELECT _FUNC_(1234567891.1234567891, 4);
+       1234567891.1234
+      > SELECT _FUNC_(1234567891.1234567891, -4);
+       1234560000
+      > SELECT _FUNC_(1234567891.1234567891);
+       1234567891
+  """)
+// scalastyle:on line.size.limit
+case class Trunc(data: Expression, format: Expression)
+  extends BinaryExpression with ExpectsInputTypes {
+
+  def this(data: Expression) = {
+    this(data, Literal(if (data.dataType.isInstanceOf[DateType]) "MM" else 0))
+  }
+
+  override def left: Expression = data
+  override def right: Expression = format
+
+  override def dataType: DataType = data.dataType
+
+  override def inputTypes: Seq[AbstractDataType] = dataType match {
+    case NullType => Seq(dataType, TypeCollection(StringType, IntegerType))
+    case DateType => Seq(dataType, StringType)
+    case DoubleType | DecimalType.Fixed(_, _) => Seq(dataType, IntegerType)
+    case _ => Seq(TypeCollection(DateType, DoubleType, DecimalType),
+      TypeCollection(StringType, IntegerType))
+  }
+
+  override def nullable: Boolean = true
+
+  override def prettyName: String = "trunc"
+
+  private val isTruncNumber =
+    (dataType.isInstanceOf[DoubleType] || dataType.isInstanceOf[DecimalType]) &&
+      format.dataType.isInstanceOf[IntegerType]
+  private val isTruncDate =
+    dataType.isInstanceOf[DateType] && format.dataType.isInstanceOf[StringType]
+
+  private lazy val truncFormat: Int = if (isTruncNumber) {
+    format.eval().asInstanceOf[Int]
+  } else if (isTruncDate) {
+    DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
+  } else {
+    0
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val d = data.eval(input)
+    val form = format.eval()
+    if (null == d || null == form) {
+      null
+    } else {
+      if (isTruncNumber) {
+        val scale = if (format.foldable) truncFormat else format.eval().asInstanceOf[Int]
+        data.dataType match {
+          case DoubleType => BigDecimalUtils.trunc(d.asInstanceOf[Double], scale)
+          case DecimalType.Fixed(_, _) =>
+            BigDecimalUtils.trunc(d.asInstanceOf[Decimal].toJavaBigDecimal, scale)
+        }
+      } else if (isTruncDate) {
+        val level = if (format.foldable) {
+          truncFormat
+        } else {
+          DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
+        }
+        if (level == -1) {
+          // unknown format
+          null
+        } else {
+          DateTimeUtils.truncDate(d.asInstanceOf[Int], level)
+        }
+      } else {
+        null
+      }
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+    if (isTruncNumber) {
+      val bdu = BigDecimalUtils.getClass.getName.stripSuffix("$")
+
+      if (format.foldable) {
+        val d = data.genCode(ctx)
+        ev.copy(code = s"""
+          ${d.code}
+          boolean ${ev.isNull} = ${d.isNull};
+          ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+          if (!${ev.isNull}) {
+            ${ev.value} = $bdu.trunc(${d.value}, $truncFormat);
+          }""")
+      } else {
+        nullSafeCodeGen(ctx, ev, (doubleVal, fmt) => s"${ev.value} = $bdu.trunc($doubleVal, $fmt);")
+      }
+    } else if (isTruncDate) {
+      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+
+      if (format.foldable) {
+        if (truncFormat == -1) {
+          ev.copy(code = s"""
+            boolean ${ev.isNull} = true;
+            ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+            """)
+        } else {
+          val d = data.genCode(ctx)
+          ev.copy(code = s"""
+            ${d.code}
+            boolean ${ev.isNull} = ${d.isNull};
+            ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+            if (!${ev.isNull}) {
+              ${ev.value} = $dtu.truncDate(${d.value}, $truncFormat);
+            }""")
+        }
+      } else {
+        nullSafeCodeGen(ctx, ev, (dateVal, fmt) => {
+          val form = ctx.freshName("form")
+          s"""
+            int $form = $dtu.parseTruncLevel($fmt);
+            if ($form == -1) {
+              ${ev.isNull} = true;
+            } else {
+              ${ev.value} = $dtu.truncDate($dateVal, $form);
+            }
+          """
+        })
+      }
+    } else {
+      nullSafeCodeGen(ctx, ev, (dateVal, fmt) => s"${ev.isNull} = true;")
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BigDecimalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BigDecimalUtils.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.math.{BigDecimal => JBigDecimal}
+
+/**
+ * Helper functions for BigDecimal.
+ */
+object BigDecimalUtils {
+
+  /**
+   * Returns double type input truncated to scale decimal places.
+   */
+  def trunc(input: Double, scale: Int): Double = {
+    trunc(JBigDecimal.valueOf(input), scale).doubleValue()
+  }
+
+  /**
+   * Returns BigDecimal type input truncated to scale decimal places.
+   */
+  def trunc(input: JBigDecimal, scale: Int): JBigDecimal = {
+    // Copy from (https://github.com/apache/hive/blob/release-2.3.0-rc0
+    // /ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java#L471-L487)
+    val pow = if (scale >= 0) {
+      JBigDecimal.valueOf(Math.pow(10, scale))
+    } else {
+      JBigDecimal.valueOf(Math.pow(10, Math.abs(scale)))
+    }
+
+    val out = if (scale > 0) {
+      val longValue = input.multiply(pow).longValue()
+      JBigDecimal.valueOf(longValue).divide(pow)
+    } else if (scale == 0) {
+      JBigDecimal.valueOf(input.longValue())
+    } else {
+      val longValue = input.divide(pow).longValue()
+      JBigDecimal.valueOf(longValue).multiply(pow)
+    }
+    out
+  }
+}
diff --git a/...alyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/...alyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -527,27 +527,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null)
   }
 
-  test("function trunc") {
-    def testTrunc(input: Date, fmt: String, expected: Date): Unit = {
-      checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)),
-        expected)
-      checkEvaluation(
-        TruncDate(Literal.create(input, DateType), NonFoldableLiteral.create(fmt, StringType)),
-        expected)
-    }
-    val date = Date.valueOf("2015-07-22")
-    Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt =>
-      testTrunc(date, fmt, Date.valueOf("2015-01-01"))
-    }
-    Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt =>
-      testTrunc(date, fmt, Date.valueOf("2015-07-01"))
-    }
-    testTrunc(date, "DD", null)
-    testTrunc(date, null, null)
-    testTrunc(null, "MON", null)
-    testTrunc(null, null, null)
-  }
-
   test("from_unixtime") {
     val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"