apache · HyukjinKwon · Oct 11, 2016 · Oct 12, 2016 · Oct 12, 2016 · Oct 16, 2016
diff --git a/...catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/...catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.TaskContext
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
-import org.apache.spark.sql.types.{DataType, DoubleType}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.random.XORShiftRandom
 
@@ -32,10 +31,7 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * Since this expression is stateful, it cannot be a case object.
  */
-abstract class RDG extends LeafExpression with Nondeterministic {
-
-  protected def seed: Long
-
+abstract class RDG extends UnaryExpression with ExpectsInputTypes with Nondeterministic {
   /**
    * Record ID within each partition. By being transient, the Random Number Generator is
    * reset every time we serialize and deserialize and initialize it.
@@ -46,12 +42,18 @@ abstract class RDG extends LeafExpression with Nondeterministic {
     rng = new XORShiftRandom(seed + partitionIndex)
   }
 
+  @transient protected lazy val seed: Long = child match {
+    case Literal(s, IntegerType) => s.asInstanceOf[Int]
+    case Literal(s, LongType) => s.asInstanceOf[Long]
+    case _ => throw new AnalysisException(
+      s"Input argument to $prettyName must be an integer, long or null literal.")
+  }
+
   override def nullable: Boolean = false
 
   override def dataType: DataType = DoubleType
 
-  // NOTE: Even if the user doesn't provide a seed, Spark SQL adds a default seed.
-  override def sql: String = s"$prettyName($seed)"
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(IntegerType, LongType))
 }
 
 /** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */
@@ -64,17 +66,15 @@ abstract class RDG extends LeafExpression with Nondeterministic {
        0.9629742951434543
       > SELECT _FUNC_(0);
        0.8446490682263027
+      > SELECT _FUNC_(null);
+       0.8446490682263027
   """)
 // scalastyle:on line.size.limit
-case class Rand(seed: Long) extends RDG {
-  override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
+case class Rand(child: Expression) extends RDG {
 
-  def this() = this(Utils.random.nextLong())
+  def this() = this(Literal(Utils.random.nextLong()))
 
-  def this(seed: Expression) = this(seed match {
-    case IntegerLiteral(s) => s
-    case _ => throw new AnalysisException("Input argument to rand must be an integer literal.")
-  })
+  override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rngTerm = ctx.freshName("rng")
@@ -87,6 +87,10 @@ case class Rand(seed: Long) extends RDG {
   }
 }
 
+object Rand {
+  def apply(seed: Long): Rand = Rand(Literal(seed))
+}
+
 /** Generate a random column with i.i.d. values drawn from the standard normal distribution. */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
@@ -97,17 +101,15 @@ case class Rand(seed: Long) extends RDG {
        -0.3254147983080288
       > SELECT _FUNC_(0);
        1.1164209726833079
+      > SELECT _FUNC_(null);
+       1.1164209726833079
   """)
 // scalastyle:on line.size.limit
-case class Randn(seed: Long) extends RDG {
-  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
+case class Randn(child: Expression) extends RDG {
 
-  def this() = this(Utils.random.nextLong())
+  def this() = this(Literal(Utils.random.nextLong()))
 
-  def this(seed: Expression) = this(seed match {
-    case IntegerLiteral(s) => s
-    case _ => throw new AnalysisException("Input argument to randn must be an integer literal.")
-  })
+  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rngTerm = ctx.freshName("rng")
@@ -119,3 +121,7 @@ case class Randn(seed: Long) extends RDG {
       final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();""", isNull = "false")
   }
 }
+
+object Randn {
+  def apply(seed: Long): Randn = Randn(Literal(seed))
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -20,12 +20,18 @@ package org.apache.spark.sql.catalyst.expressions
 import org.scalatest.Matchers._
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types.{IntegerType, LongType}
 
 class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("random") {
     checkDoubleEvaluation(Rand(30), 0.31429268272540556 +- 0.001)
     checkDoubleEvaluation(Randn(30), -0.4798519469521663 +- 0.001)
+
+    checkDoubleEvaluation(
+      new Rand(Literal.create(null, LongType)), 0.8446490682263027 +- 0.001)
+    checkDoubleEvaluation(
+      new Randn(Literal.create(null, IntegerType)), 1.1164209726833079 +- 0.001)
   }
 
   test("SPARK-9127 codegen with long seed") {

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1728,4 +1728,29 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     val df = spark.createDataFrame(spark.sparkContext.makeRDD(rows), schema)
     assert(df.filter($"array1" === $"array2").count() == 1)
   }
+
+  test("SPARK-17854: rand/randn allows null and long as input seed") {
+    checkAnswer(testData.selectExpr("rand(NULL)"), testData.selectExpr("rand(0)"))
+    checkAnswer(testData.selectExpr("rand(0L)"), testData.selectExpr("rand(0)"))
+    checkAnswer(testData.selectExpr("randn(NULL)"), testData.selectExpr("randn(0)"))
+    checkAnswer(testData.selectExpr("randn(0L)"), testData.selectExpr("randn(0)"))
+    checkAnswer(testData.selectExpr("rand(cast(NULL AS INT))"), testData.selectExpr("rand(0)"))
+    checkAnswer(testData.selectExpr("rand(cast(3 / 7 AS INT))"), testData.selectExpr("rand(0)"))
+    checkAnswer(
+      testData.selectExpr("randn(cast(NULL AS LONG))"), testData.selectExpr("randn(0L)"))
+    checkAnswer(
+      testData.selectExpr("randn(cast(3L / 12L AS LONG))"), testData.selectExpr("randn(0L)"))
+
+    val eOne = intercept[AnalysisException] {
+      testData.selectExpr("rand(key)").collect()
+    }
+    assert(
+      eOne.message.contains("Input argument to rand must be an integer, long or null literal."))
+
+    val eTwo = intercept[AnalysisException] {
+      testData.selectExpr("randn(key)").collect()
+    }
+    assert(
+      eTwo.message.contains("Input argument to randn must be an integer, long or null literal."))
+  }
 }