pdeyhim
diff --git a/‎bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala‎
Lines changed: 0 additions & 2 deletions b/‎bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/BlockManager.scala‎
Lines changed: 20 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/storage/BlockManager.scala‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/StorageLevel.scala‎
Lines changed: 21 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/storage/StorageLevel.scala‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎core/src/test/java/org/apache/spark/JavaAPISuite.java‎
Lines changed: 0 additions & 3 deletions b/‎core/src/test/java/org/apache/spark/JavaAPISuite.java‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/AkkaUtilsSuite.scala‎
Lines changed: 0 additions & 4 deletions b/‎core/src/test/scala/org/apache/spark/AkkaUtilsSuite.scala‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala‎
Lines changed: 0 additions & 3 deletions b/‎core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala‎
Lines changed: 0 additions & 2 deletions b/‎core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/spark-debugger.md‎
Lines changed: 0 additions & 121 deletions b/‎docs/spark-debugger.md‎
Lines changed: 0 additions & 121 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala‎
Lines changed: 128 additions & 0 deletions b/‎examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala‎
Lines changed: 128 additions & 0 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala‎
Lines changed: 1 addition & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala‎
Lines changed: 1 addition & 1 deletion
@@ -38,8 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
       sc.stop()
       sc = null
     }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port")
   }
 
   test("halting by voting") {
 
@@ -1015,8 +1015,26 @@ private[spark] class BlockManager(
       bytes: ByteBuffer,
       serializer: Serializer = defaultSerializer): Iterator[Any] = {
     bytes.rewind()
-    val stream = wrapForCompression(blockId, new ByteBufferInputStream(bytes, true))
-    serializer.newInstance().deserializeStream(stream).asIterator
+
+    def getIterator = {
+      val stream = wrapForCompression(blockId, new ByteBufferInputStream(bytes, true))
+      serializer.newInstance().deserializeStream(stream).asIterator
+    }
+
+    if (blockId.isShuffle) {
+      // Reducer may need to read many local shuffle blocks and will wrap them into Iterators
+      // at the beginning. The wrapping will cost some memory (compression instance
+      // initialization, etc.). Reducer read shuffle blocks one by one so we could do the
+      // wrapping lazily to save memory.
+      class LazyProxyIterator(f: => Iterator[Any]) extends Iterator[Any] {
+        lazy val proxy = f
+        override def hasNext: Boolean = proxy.hasNext
+        override def next(): Any = proxy.next()
+      }
+      new LazyProxyIterator(getIterator)
+    } else {
+      getIterator
+    }
   }
 
   def stop() {
 
@@ -147,6 +147,27 @@ object StorageLevel {
   val MEMORY_AND_DISK_SER_2 = new StorageLevel(true, true, false, false, 2)
   val OFF_HEAP = new StorageLevel(false, false, true, false)
 
+  /**
+   * :: DeveloperApi ::
+   * Return the StorageLevel object with the specified name.
+   */
+  @DeveloperApi
+  def fromString(s: String): StorageLevel = s match {
+    case "NONE" => NONE
+    case "DISK_ONLY" => DISK_ONLY
+    case "DISK_ONLY_2" => DISK_ONLY_2
+    case "MEMORY_ONLY" => MEMORY_ONLY
+    case "MEMORY_ONLY_2" => MEMORY_ONLY_2
+    case "MEMORY_ONLY_SER" => MEMORY_ONLY_SER
+    case "MEMORY_ONLY_SER_2" => MEMORY_ONLY_SER_2
+    case "MEMORY_AND_DISK" => MEMORY_AND_DISK
+    case "MEMORY_AND_DISK_2" => MEMORY_AND_DISK_2
+    case "MEMORY_AND_DISK_SER" => MEMORY_AND_DISK_SER
+    case "MEMORY_AND_DISK_SER_2" => MEMORY_AND_DISK_SER_2
+    case "OFF_HEAP" => OFF_HEAP
+    case _ => throw new IllegalArgumentException("Invalid StorageLevel: " + s)
+  }
+
   /**
    * :: DeveloperApi ::
    * Create a new StorageLevel object without setting useOffHeap.
 
@@ -68,9 +68,6 @@ public void setUp() {
   public void tearDown() {
     sc.stop();
     sc = null;
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port");
-    Utils.deleteRecursively(tempDir);
   }
 
   static class ReverseIntComparator implements Comparator<Integer>, Serializable {
 
@@ -39,7 +39,6 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
       conf = conf, securityManager = securityManager)
-    System.setProperty("spark.driver.port", boundPort.toString)    // Will be cleared by LocalSparkContext
     System.setProperty("spark.hostPort", hostname + ":" + boundPort)
     assert(securityManager.isAuthenticationEnabled() === true)
 
@@ -77,7 +76,6 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
       conf = conf, securityManager = securityManager)
-    System.setProperty("spark.driver.port", boundPort.toString)    // Will be cleared by LocalSparkContext
     System.setProperty("spark.hostPort", hostname + ":" + boundPort)
 
     assert(securityManager.isAuthenticationEnabled() === false)
@@ -129,7 +127,6 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
       conf = conf, securityManager = securityManager)
-    System.setProperty("spark.driver.port", boundPort.toString)    // Will be cleared by LocalSparkContext
     System.setProperty("spark.hostPort", hostname + ":" + boundPort)
 
     assert(securityManager.isAuthenticationEnabled() === true)
@@ -182,7 +179,6 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
       conf = conf, securityManager = securityManager)
-    System.setProperty("spark.driver.port", boundPort.toString)    // Will be cleared by LocalSparkContext
     System.setProperty("spark.hostPort", hostname + ":" + boundPort)
 
     assert(securityManager.isAuthenticationEnabled() === true)
 
@@ -124,9 +124,6 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0, conf = conf,
       securityManager = new SecurityManager(conf))
 
-    // Will be cleared by LocalSparkContext
-    System.setProperty("spark.driver.port", boundPort.toString)
-
     val masterTracker = new MapOutputTrackerMaster(conf)
     masterTracker.trackerActor = actorSystem.actorOf(
       Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
 
@@ -78,8 +78,6 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   after {
-    System.clearProperty("spark.driver.port")
-
     if (store != null) {
       store.stop()
       store = null
 
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.graphx
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx.PartitionStrategy
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.graphx.util.GraphGenerators
+import java.io.{PrintWriter, FileOutputStream}
+
+/**
+ * The SynthBenchmark application can be used to run various GraphX algorithms on
+ * synthetic log-normal graphs.  The intent of this code is to enable users to
+ * profile the GraphX system without access to large graph datasets.
+ */
+object SynthBenchmark {
+
+  /**
+   * To run this program use the following:
+   *
+   * MASTER=spark://foobar bin/run-example graphx.SynthBenchmark -app=pagerank
+   *
+   * Options:
+   *   -app "pagerank" or "cc" for pagerank or connected components. (Default: pagerank)
+   *   -niters the number of iterations of pagerank to use (Default: 10)
+   *   -numVertices the number of vertices in the graph (Default: 1000000)
+   *   -numEPart the number of edge partitions in the graph (Default: number of cores)
+   *   -partStrategy the graph partitioning strategy to use
+   *   -mu the mean parameter for the log-normal graph (Default: 4.0)
+   *   -sigma the stdev parameter for the log-normal graph (Default: 1.3)
+   *   -degFile the local file to save the degree information (Default: Empty)
+   */
+  def main(args: Array[String]) {
+    val options = args.map {
+      arg =>
+        arg.dropWhile(_ == '-').split('=') match {
+          case Array(opt, v) => (opt -> v)
+          case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+        }
+    }
+
+    var app = "pagerank"
+    var niter = 10
+    var numVertices = 100000
+    var numEPart: Option[Int] = None
+    var partitionStrategy: Option[PartitionStrategy] = None
+    var mu: Double = 4.0
+    var sigma: Double = 1.3
+    var degFile: String = ""
+
+    options.foreach {
+      case ("app", v) => app = v
+      case ("niter", v) => niter = v.toInt
+      case ("nverts", v) => numVertices = v.toInt
+      case ("numEPart", v) => numEPart = Some(v.toInt)
+      case ("partStrategy", v) => partitionStrategy = Some(PartitionStrategy.fromString(v))
+      case ("mu", v) => mu = v.toDouble
+      case ("sigma", v) => sigma = v.toDouble
+      case ("degFile", v) => degFile = v
+      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+    }
+
+    val conf = new SparkConf()
+      .setAppName(s"GraphX Synth Benchmark (nverts = $numVertices, app = $app)")
+      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .set("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
+
+    val sc = new SparkContext(conf)
+
+    // Create the graph
+    println(s"Creating graph...")
+    val unpartitionedGraph = GraphGenerators.logNormalGraph(sc, numVertices,
+      numEPart.getOrElse(sc.defaultParallelism), mu, sigma)
+    // Repartition the graph
+    val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_)).cache()
+
+    var startTime = System.currentTimeMillis()
+    val numEdges = graph.edges.count()
+    println(s"Done creating graph. Num Vertices = $numVertices, Num Edges = $numEdges")
+    val loadTime = System.currentTimeMillis() - startTime
+
+    // Collect the degree distribution (if desired)
+    if (!degFile.isEmpty) {
+      val fos = new FileOutputStream(degFile)
+      val pos = new PrintWriter(fos)
+      val hist = graph.vertices.leftJoin(graph.degrees)((id, _, optDeg) => optDeg.getOrElse(0))
+        .map(p => p._2).countByValue()
+      hist.foreach {
+        case (deg, count) => pos.println(s"$deg \t $count")
+      }
+    }
+
+    // Run PageRank
+    startTime = System.currentTimeMillis()
+    if (app == "pagerank") {
+      println("Running PageRank")
+      val totalPR = graph.staticPageRank(niter).vertices.map(_._2).sum()
+      println(s"Total PageRank = $totalPR")
+    } else if (app == "cc") {
+      println("Running Connected Components")
+      val numComponents = graph.connectedComponents.vertices.map(_._2).distinct()
+      println(s"Number of components = $numComponents")
+    }
+    val runTime = System.currentTimeMillis() - startTime
+
+    println(s"Num Vertices = $numVertices")
+    println(s"Num Edges = $numEdges")
+    println(s"Creation time = ${loadTime/1000.0} seconds")
+    println(s"Run time = ${runTime/1000.0} seconds")
+
+    sc.stop()
+  }
+}
@@ -43,7 +43,7 @@ object RDDRelation {
     sql("SELECT * FROM records").collect().foreach(println)
 
     // Aggregation queries are also supported.
-    val count = sql("SELECT COUNT(*) FROM records").collect().head.getInt(0)
+    val count = sql("SELECT COUNT(*) FROM records").collect().head.getLong(0)
     println(s"COUNT(*): $count")
 
     // The results of SQL queries are themselves RDDs and support all normal RDD functions.  The
Original file line number	Diff line number	Diff line change
`@@ -38,8 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo`
`38`	`38`	`sc.stop()`
`39`	`39`	`sc = null`
`40`	`40`	`}`
`41`		`- // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown`
`42`		`- System.clearProperty("spark.driver.port")`
`43`	`41`	`}`
`44`	`42`
`45`	`43`	`test("halting by voting") {`
Original file line number	Diff line number	Diff line change
`@@ -68,9 +68,6 @@ public void setUp() {`
`68`	`68`	`public void tearDown() {`
`69`	`69`	`sc.stop();`
`70`	`70`	`sc = null;`
`71`		`- // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown`
`72`		`- System.clearProperty("spark.driver.port");`
`73`		`- Utils.deleteRecursively(tempDir);`
`74`	`71`	`}`
`75`	`72`
`76`	`73`	`static class ReverseIntComparator implements Comparator<Integer>, Serializable {`
Original file line number	Diff line number	Diff line change
`@@ -78,8 +78,6 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT`
`78`	`78`	`}`
`79`	`79`
`80`	`80`	`after {`
`81`		`- System.clearProperty("spark.driver.port")`
`82`		`-`
`83`	`81`	`if (store != null) {`
`84`	`82`	`store.stop()`
`85`	`83`	`store = null`