apache
diff --git a/‎LICENSE‎
Lines changed: 32 additions & 0 deletions b/‎LICENSE‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎NOTICE‎
Lines changed: 1 addition & 1 deletion b/‎NOTICE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 3 additions & 14 deletions b/‎README.md‎
Lines changed: 3 additions & 14 deletions
diff --git a/‎assembly/pom.xml‎
Lines changed: 16 additions & 3 deletions b/‎assembly/pom.xml‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎assembly/src/main/assembly/assembly.xml‎
Lines changed: 11 additions & 0 deletions b/‎assembly/src/main/assembly/assembly.xml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎bagel/pom.xml‎
Lines changed: 16 additions & 2 deletions b/‎bagel/pom.xml‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala‎
Lines changed: 7 additions & 7 deletions b/‎bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎bin/spark-class‎
Lines changed: 28 additions & 20 deletions b/‎bin/spark-class‎
Lines changed: 28 additions & 20 deletions
diff --git a/‎bin/spark-class2.cmd‎
Lines changed: 35 additions & 12 deletions b/‎bin/spark-class2.cmd‎
Lines changed: 35 additions & 12 deletions
@@ -396,3 +396,35 @@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
+
+
+========================================================================
+For sbt and sbt-launch-lib.bash in sbt/:
+========================================================================
+
+// Generated from http://www.opensource.org/licenses/bsd-license.php
+Copyright (c) 2011, Paul Phillips.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of the author nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -1,5 +1,5 @@
 Apache Spark
-Copyright 2013 The Apache Software Foundation.
+Copyright 2014 The Apache Software Foundation.
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
@@ -1,12 +1,12 @@
 # Apache Spark
 
-Lightning-Fast Cluster Computing - <http://spark.incubator.apache.org/>
+Lightning-Fast Cluster Computing - <http://spark.apache.org/>
 
 
 ## Online Documentation
 
 You can find the latest Spark documentation, including a programming
-guide, on the project webpage at <http://spark.incubator.apache.org/documentation.html>.
+guide, on the project webpage at <http://spark.apache.org/documentation.html>.
 This README file only contains basic setup instructions.
 
 
@@ -92,21 +92,10 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
 
 ## Configuration
 
-Please refer to the [Configuration guide](http://spark.incubator.apache.org/docs/latest/configuration.html)
+Please refer to the [Configuration guide](http://spark.apache.org/docs/latest/configuration.html)
 in the online documentation for an overview on how to configure Spark.
 
 
-## Apache Incubator Notice
-
-Apache Spark is an effort undergoing incubation at The Apache Software
-Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of
-all newly accepted projects until a further review indicates that the
-infrastructure, communications, and decision making process have stabilized in
-a manner consistent with other successful ASF projects. While incubation status
-is not necessarily a reflection of the completeness or stability of the code,
-it does indicate that the project has yet to be fully endorsed by the ASF.
-
-
 ## Contributing to Spark
 
 Contributions via GitHub pull requests are gladly accepted from their original
 
@@ -21,17 +21,20 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.0.0-incubating-SNAPSHOT</version>
+    <version>1.0.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-assembly_2.10</artifactId>
   <name>Spark Project Assembly</name>
-  <url>http://spark.incubator.apache.org/</url>
+  <url>http://spark.apache.org/</url>
+  <packaging>pom</packaging>
 
   <properties>
-    <spark.jar>${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar>
+    <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
+    <spark.jar.basename>${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
+    <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
     <deb.pkg.name>spark</deb.pkg.name>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
@@ -155,6 +158,16 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>spark-ganglia-lgpl</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-ganglia-lgpl_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
     <profile>
       <id>bigtop-dist</id>
       <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
 
@@ -55,6 +55,15 @@
         <include>**/*</include>
       </includes>
     </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}/assembly/target/${spark.jar.dir}
+      </directory>
+      <outputDirectory>/</outputDirectory>
+      <includes>
+        <include>${spark.jar.basename}</include>
+      </includes>
+    </fileSet>
   </fileSets>
 
   <dependencySets>
@@ -75,6 +84,8 @@
       <excludes>
         <exclude>org.apache.hadoop:*:jar</exclude>
         <exclude>org.apache.spark:*:jar</exclude>
+        <exclude>org.apache.zookeeper:*:jar</exclude>
+        <exclude>org.apache.avro:*:jar</exclude>
       </excludes>
     </dependencySet>
   </dependencySets>
 
@@ -21,15 +21,29 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.0.0-incubating-SNAPSHOT</version>
+    <version>1.0.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-bagel_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Bagel</name>
-  <url>http://spark.incubator.apache.org/</url>
+  <url>http://spark.apache.org/</url>
+
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 
   <dependencies>
     <dependency>
 
@@ -27,7 +27,7 @@ object Bagel extends Logging {
 
   /**
    * Runs a Bagel program.
-   * @param sc [[org.apache.spark.SparkContext]] to use for the program.
+   * @param sc org.apache.spark.SparkContext to use for the program.
    * @param vertices vertices of the graph represented as an RDD of (Key, Vertex) pairs. Often the
    *                 Key will be the vertex id.
    * @param messages initial set of messages represented as an RDD of (Key, Message) pairs. Often
@@ -38,10 +38,10 @@ object Bagel extends Logging {
    * @param aggregator [[org.apache.spark.bagel.Aggregator]] performs a reduce across all vertices
    *                  after each superstep and provides the result to each vertex in the next
    *                  superstep.
-   * @param partitioner [[org.apache.spark.Partitioner]] partitions values by key
+   * @param partitioner org.apache.spark.Partitioner partitions values by key
    * @param numPartitions number of partitions across which to split the graph.
    *                      Default is the default parallelism of the SparkContext
-   * @param storageLevel [[org.apache.spark.storage.StorageLevel]] to use for caching of
+   * @param storageLevel org.apache.spark.storage.StorageLevel to use for caching of
    *                    intermediate RDDs in each superstep. Defaults to caching in memory.
    * @param compute function that takes a Vertex, optional set of (possibly combined) messages to
    *                the Vertex, optional Aggregator and the current superstep,
@@ -131,7 +131,7 @@ object Bagel extends Logging {
 
   /**
    * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default
-   * [[org.apache.spark.HashPartitioner]] and default storage level
+   * org.apache.spark.HashPartitioner and default storage level
    */
   def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
     sc: SparkContext,
@@ -146,7 +146,7 @@ object Bagel extends Logging {
 
   /**
    * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the
-   * default [[org.apache.spark.HashPartitioner]]
+   * default org.apache.spark.HashPartitioner
    */
   def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
     sc: SparkContext,
@@ -166,7 +166,7 @@ object Bagel extends Logging {
 
   /**
    * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]],
-   * default [[org.apache.spark.HashPartitioner]],
+   * default org.apache.spark.HashPartitioner,
    * [[org.apache.spark.bagel.DefaultCombiner]] and the default storage level
    */
   def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest](
@@ -180,7 +180,7 @@ object Bagel extends Logging {
 
   /**
    * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]],
-   * the default [[org.apache.spark.HashPartitioner]]
+   * the default org.apache.spark.HashPartitioner
    * and [[org.apache.spark.bagel.DefaultCombiner]]
    */
   def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest](
 
@@ -40,34 +40,46 @@ if [ -z "$1" ]; then
   exit 1
 fi
 
-# If this is a standalone cluster daemon, reset SPARK_JAVA_OPTS and SPARK_MEM to reasonable
-# values for that; it doesn't need a lot
-if [ "$1" = "org.apache.spark.deploy.master.Master" -o "$1" = "org.apache.spark.deploy.worker.Worker" ]; then
-  SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
-  SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
-  # Do not overwrite SPARK_JAVA_OPTS environment variable in this script
-  OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS"   # Empty by default
-else
-  OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
+if [ -n "$SPARK_MEM" ]; then
+  echo "Warning: SPARK_MEM is deprecated, please use a more specific config option"
+  echo "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)."
 fi
 
+# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
+DEFAULT_MEM=${SPARK_MEM:-512m}
+
+SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
 
-# Add java opts for master, worker, executor. The opts maybe null
+# Add java opts and memory settings for master, worker, executors, and repl.
 case "$1" in
+  # Master and Worker use SPARK_DAEMON_JAVA_OPTS (and specific opts) + SPARK_DAEMON_MEMORY.
   'org.apache.spark.deploy.master.Master')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_MASTER_OPTS"
+    OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_MASTER_OPTS"
+    OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
     ;;
   'org.apache.spark.deploy.worker.Worker')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS"
+    OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_WORKER_OPTS"
+    OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
     ;;
+
+  # Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
   'org.apache.spark.executor.CoarseGrainedExecutorBackend')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+    OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+    OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
     ;;
   'org.apache.spark.executor.MesosExecutorBackend')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+    OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+    OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
     ;;
+
+  # All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
   'org.apache.spark.repl.Main')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_REPL_OPTS"
+    OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_REPL_OPTS"
+    OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
+    ;;
+  *)
+    OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
+    OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
     ;;
 esac
 
@@ -83,14 +95,10 @@ else
   fi
 fi
 
-# Set SPARK_MEM if it isn't already set since we also use it for this process
-SPARK_MEM=${SPARK_MEM:-512m}
-export SPARK_MEM
-
 # Set JAVA_OPTS to be able to load native libraries and to set heap size
 JAVA_OPTS="$OUR_JAVA_OPTS"
 JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
-JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
 if [ -e "$FWDIR/conf/java-opts" ] ; then
   JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
 
@@ -34,22 +34,45 @@ if not "x%1"=="x" goto arg_given
   goto exit
 :arg_given
 
-set RUNNING_DAEMON=0
-if "%1"=="spark.deploy.master.Master" set RUNNING_DAEMON=1
-if "%1"=="spark.deploy.worker.Worker" set RUNNING_DAEMON=1
-if "x%SPARK_DAEMON_MEMORY%" == "x" set SPARK_DAEMON_MEMORY=512m
+if not "x%SPARK_MEM%"=="x" (
+  echo Warning: SPARK_MEM is deprecated, please use a more specific config option
+  echo e.g., spark.executor.memory or SPARK_DRIVER_MEMORY.
+)
+
+rem Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
+set OUR_JAVA_MEM=%SPARK_MEM%
+if "x%OUR_JAVA_MEM%"=="x" set OUR_JAVA_MEM=512m
+
 set SPARK_DAEMON_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS% -Dspark.akka.logLifecycleEvents=true
-if "%RUNNING_DAEMON%"=="1" set SPARK_MEM=%SPARK_DAEMON_MEMORY%
-rem Do not overwrite SPARK_JAVA_OPTS environment variable in this script
-if "%RUNNING_DAEMON%"=="0" set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
-if "%RUNNING_DAEMON%"=="1" set OUR_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS%
 
-rem Figure out how much memory to use per executor and set it as an environment
-rem variable so that our process sees it and can report it to Mesos
-if "x%SPARK_MEM%"=="x" set SPARK_MEM=512m
+rem Add java opts and memory settings for master, worker, executors, and repl.
+rem Master and Worker use SPARK_DAEMON_JAVA_OPTS (and specific opts) + SPARK_DAEMON_MEMORY.
+if "%1"=="org.apache.spark.deploy.master.Master" (
+  set OUR_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS% %SPARK_MASTER_OPTS%
+  if not "x%SPARK_DAEMON_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DAEMON_MEMORY%
+) else if "%1"=="org.apache.spark.deploy.worker.Worker" (
+  set OUR_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS% %SPARK_WORKER_OPTS%
+  if not "x%SPARK_DAEMON_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DAEMON_MEMORY%
+
+rem Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
+) else if "%1"=="org.apache.spark.executor.CoarseGrainedExecutorBackend" (
+  set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_EXECUTOR_OPTS%
+  if not "x%SPARK_EXECUTOR_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_EXECUTOR_MEMORY%
+) else if "%1"=="org.apache.spark.executor.MesosExecutorBackend" (
+  set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_EXECUTOR_OPTS%
+  if not "x%SPARK_EXECUTOR_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_EXECUTOR_MEMORY%
+
+rem All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
+) else if "%1"=="org.apache.spark.repl.Main" (
+  set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_REPL_OPTS%
+  if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
+) else (
+  set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
+  if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
+)
 
 rem Set JAVA_OPTS to be able to load native libraries and to set heap size
-set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM%
+set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
 rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
 
 rem Test whether the user has built Spark