@@ -100,6 +100,34 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
100100 partitioner.getPartition(Row (100 ))
101101 }
102102
103+ test(" RangePartitioner should run only one job if data is roughly balanced" ) {
104+ val rdd = sc.makeRDD(0 until 20 , 20 ).flatMap { i =>
105+ val random = new java.util.Random (i)
106+ Iterator .fill(5000 * i)((random.nextDouble() + i, i))
107+ }.cache()
108+ for (numPartitions <- Seq (10 , 20 , 40 )) {
109+ val partitioner = new RangePartitioner (numPartitions, rdd)
110+ assert(partitioner.numPartitions === numPartitions)
111+ assert(partitioner.singlePass === true )
112+ val counts = rdd.keys.map(key => partitioner.getPartition(key)).countByValue().values
113+ assert(counts.max < 2.0 * counts.min)
114+ }
115+ }
116+
117+ test(" RangePartitioner should work well on unbalanced data" ) {
118+ val rdd = sc.makeRDD(0 until 20 , 20 ).flatMap { i =>
119+ val random = new java.util.Random (i)
120+ Iterator .fill(20 * i * i * i)((random.nextDouble() + i, i))
121+ }.cache()
122+ for (numPartitions <- Seq (2 , 4 , 8 )) {
123+ val partitioner = new RangePartitioner (numPartitions, rdd)
124+ assert(partitioner.numPartitions === numPartitions)
125+ assert(partitioner.singlePass === false )
126+ val counts = rdd.keys.map(key => partitioner.getPartition(key)).countByValue().values
127+ assert(counts.max < 2.0 * counts.min)
128+ }
129+ }
130+
103131 test(" HashPartitioner not equal to RangePartitioner" ) {
104132 val rdd = sc.parallelize(1 to 10 ).map(x => (x, x))
105133 val rangeP2 = new RangePartitioner (2 , rdd)
0 commit comments