Simplify test

sameeragarwal · sameeragarwal · commit 1b30119d4524 · 2016-01-07T00:35:12.000-08:00
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -63,26 +63,24 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
   }
 
   test("randomSplit on reordered partitions") {
-    val n = 600
     // This test ensures that randomSplit does not create overlapping splits even when the
     // underlying dataframe (such as the one below) doesn't guarantee a deterministic ordering of
     // rows in each partition.
     val data =
-      sparkContext.parallelize(1 to n, 2).mapPartitions(scala.util.Random.shuffle(_)).toDF("id")
-    val splits = data.randomSplit(Array[Double](1, 2, 3), seed = 1)
-    assert(splits.length == 3, "wrong number of splits")
+      sparkContext.parallelize(1 to 600, 2).mapPartitions(scala.util.Random.shuffle(_)).toDF("id")
+    val splits = data.randomSplit(Array[Double](2, 3), seed = 1)
 
-    assert(splits.reduce((a, b) => a.unionAll(b)).sort("id").collect().toList ==
-      data.sort($"id").collect().toList, "incomplete or wrong split")
+    assert(splits.length == 2, "wrong number of splits")
 
-    for (id <- splits.indices) {
-      assert(splits(id).intersect(splits((id + 1) % splits.length)).collect().isEmpty,
-        s"split $id overlaps with split ${(id + 1) % splits.length}")
-    }
+    // Verify that the splits span the entire dataset
+    assert(splits.flatMap(_.collect()).toSet == data.collect().toSet)
+
+    // Verify that the splits don't overalap
+    assert(splits(0).intersect(splits(1)).collect().isEmpty)
 
     // Verify that the results are deterministic across multiple runs
     val firstRun = splits.toSeq.map(_.collect().toSeq)
-    val secondRun = data.randomSplit(Array[Double](1, 2, 3), seed = 1).toSeq.map(_.collect().toSeq)
+    val secondRun = data.randomSplit(Array[Double](2, 3), seed = 1).toSeq.map(_.collect().toSeq)
     assert(firstRun == secondRun)
   }