apache · mtustin-handy · Mar 26, 2016 · Mar 26, 2016 · Mar 30, 2016 · Apr 1, 2016
diff --git a/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
@@ -21,5 +21,20 @@ package org.apache.spark.partial
  * A Double value with error bars and associated confidence.
  */
 class BoundedDouble(val mean: Double, val confidence: Double, val low: Double, val high: Double) {
-  override def toString(): String = "[%.3f, %.3f]".format(low, high)
+  override def toString(): String = "BoundedDouble(%.3f, %.3f, %.3f, %.3f)".format(mean, confidence, low, high)
+
+  override def hashCode:Int = this.mean.hashCode ^ this.confidence.hashCode ^ this.low.hashCode ^ this.high.hashCode
+
+  override def equals(that: Any): Boolean =
+        that match {
+          case that: BoundedDouble => {
+            this.mean == that.mean &&
+            this.confidence == that.confidence &&
+            this.low == that.low &&
+            this.high == that.high
+          }
+          case _ => false
+        }
+
+
 }
diff --git a/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
@@ -29,8 +29,9 @@ import org.apache.spark.util.StatCounter
 private[spark] class SumEvaluator(totalOutputs: Int, confidence: Double)
   extends ApproximateEvaluator[StatCounter, BoundedDouble] {
 
+  // modified in merge
   var outputsMerged = 0
-  var counter = new StatCounter
+  val counter = new StatCounter
 
   override def merge(outputId: Int, taskResult: StatCounter) {
     outputsMerged += 1
@@ -40,30 +41,39 @@ private[spark] class SumEvaluator(totalOutputs: Int, confidence: Double)
   override def currentResult(): BoundedDouble = {
     if (outputsMerged == totalOutputs) {
       new BoundedDouble(counter.sum, 1.0, counter.sum, counter.sum)
-    } else if (outputsMerged == 0) {
+    } else if (outputsMerged == 0 || counter.count == 0) {
       new BoundedDouble(0, 0.0, Double.NegativeInfinity, Double.PositiveInfinity)
     } else {
       val p = outputsMerged.toDouble / totalOutputs
       val meanEstimate = counter.mean
-      val meanVar = counter.sampleVariance / counter.count
       val countEstimate = (counter.count + 1 - p) / p
-      val countVar = (counter.count + 1) * (1 - p) / (p * p)
       val sumEstimate = meanEstimate * countEstimate
-      val sumVar = (meanEstimate * meanEstimate * countVar) +
-                   (countEstimate * countEstimate * meanVar) +
-                   (meanVar * countVar)
-      val sumStdev = math.sqrt(sumVar)
-      val confFactor = {
-        if (counter.count > 100) {
+
+      val meanVar = counter.sampleVariance / counter.count
+
+      // branch at this point because counter.count == 1 implies counter.sampleVariance == Nan
+      // and we don't want to ever return a bound of NaN
+      if (meanVar == Double.NaN || counter.count == 1) {
+        new BoundedDouble(sumEstimate, confidence, Double.NegativeInfinity, Double.PositiveInfinity)
+      } else {
+        val countVar = (counter.count + 1) * (1 - p) / (p * p)
+        val sumVar = (meanEstimate * meanEstimate * countVar) +
+          (countEstimate * countEstimate * meanVar) +
+          (meanVar * countVar)
+        val sumStdev = math.sqrt(sumVar)
+        val confFactor = if (counter.count > 100) {
           new NormalDistribution().inverseCumulativeProbability(1 - (1 - confidence) / 2)
-        } else {
+        } else if (counter.count > 1) {
           val degreesOfFreedom = (counter.count - 1).toInt
           new TDistribution(degreesOfFreedom).inverseCumulativeProbability(1 - (1 - confidence) / 2)
+        } else {
+          throw new Exception("Counter.count <= 1; this should be impossible at this point")
         }
+
+        val low = sumEstimate - confFactor * sumStdev
+        val high = sumEstimate + confFactor * sumStdev
+        new BoundedDouble(sumEstimate, confidence, low, high)
       }
-      val low = sumEstimate - confFactor * sumStdev
-      val high = sumEstimate + confFactor * sumStdev
-      new BoundedDouble(sumEstimate, confidence, low, high)
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/partial/SumEvaluatorSuite.scala b/core/src/test/scala/org/apache/spark/partial/SumEvaluatorSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.partial
+
+
+import org.apache.spark._
+import org.apache.spark.util.StatCounter
+
+class SumEvaluatorSuite extends SparkFunSuite with SharedSparkContext {
+
+  test("correct handling of count 1") {
+
+    //setup
+    val counter = new StatCounter(List(2.0))
+    // count of 10 because it's larger than 1,
+    // and 0.95 because that's the default
+    val evaluator = new SumEvaluator(10, 0.95)
+    // arbitrarily assign id 1
+    evaluator.merge(1, counter)
+
+    //execute
+    val res = evaluator.currentResult()
+    // Build version with known precisions for equality check
+    val round_res = new BoundedDouble(res.mean.round.toDouble, res.confidence, res.low, res.high)
+
+    //Sanity check that equality works on BoundedDouble
+    assert(new BoundedDouble(2.0, 0.95, 1.1, 1.2) == new BoundedDouble(2.0, 0.95, 1.1, 1.2))
+    // actual test
+
+    // 38.0 because that's how the maths shakes out
+    assert(round_res == new BoundedDouble(38.0, 0.950, Double.NegativeInfinity, Double.PositiveInfinity))
+  }
+
+}