From 32fd9e1376eef2b90e7fedff0f74b0bab183bf7c Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Thu, 25 Jun 2015 14:33:15 +0900 Subject: [PATCH 01/22] [ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node - rebasing --- spark/pom.xml | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/spark/pom.xml b/spark/pom.xml index 782670e256f..82dc6d140f7 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -48,6 +48,8 @@ org.spark-project.akka 2.3.4-spark + + http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz @@ -912,6 +914,51 @@ + + + com.googlecode.maven-download-plugin + download-maven-plugin + 1.2.1 + + + download-pyspark-files + prepare-package + + wget + + + ${spark.download.url} + true + ${project.build.directory}/spark-dist + + + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.7 + + + download-and-zip-pyspark-files + package + + run + + + + + + + + + + + + org.scala-tools From 1b192f60e41901d639e6c22c78932a253751a756 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Sun, 21 Jun 2015 12:27:18 +0900 Subject: [PATCH 02/22] [ZEPPELIN-18] Remove setting SPARK_HOME for PySpark - Removed redundant dependency setting --- spark/pom.xml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/spark/pom.xml b/spark/pom.xml index 82dc6d140f7..22aa7ab0f32 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -475,13 +475,6 @@ - - - net.sf.py4j - py4j - 0.8.2.1 - - org.apache.commons commons-exec From 0ddb4366b909f89d4f0f59fcc324ae200d4b62de Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Wed, 24 Jun 2015 15:08:12 +0900 Subject: [PATCH 03/22] [ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node - Followed spark's way to support pyspark - https://issues.apache.org/jira/browse/SPARK-6869 - https://github.com/apache/spark/pull/5580 - https://github.com/apache/spark/pull/5478/files --- bin/interpreter.sh | 10 ++++++ spark/pom.xml | 33 ++++++++++++++----- .../zeppelin/spark/PySparkInterpreter.java | 12 ------- .../zeppelin/spark/SparkInterpreter.java | 19 ++++++++--- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/bin/interpreter.sh b/bin/interpreter.sh index c214c3081d0..524e784b044 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -73,6 +73,16 @@ if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then $(mkdir -p "${ZEPPELIN_LOG_DIR}") fi +if [[ x"" == x${PYTHONPATH} ]]; then + export PYTHONPATH="${ZEPPELIN_HOME}/python/lib/pyspark.zip:${ZEPPELIN_HOME}/python/lib/py4j-0.8.2.1-src.zip" +else + export PYTHONPATH="$PYTHONPATH${ZEPPELIN_HOME}/lib/pyspark.zip:${ZEPPELIN_HOME}/python/lib/py4j-0.8.2.1-src.zip" +fi + +if [[ x"" == x${SPARK_HOME} ]]; then + export SPARK_HOME=${ZEPPELIN_HOME} +fi + ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} -cp ${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} & pid=$! diff --git a/spark/pom.xml b/spark/pom.xml index 22aa7ab0f32..59ee5e806f5 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -915,7 +915,7 @@ download-pyspark-files - prepare-package + validate wget @@ -927,6 +927,20 @@ + + maven-clean-plugin + + + + ${basedir}/../python/build + + + ${project.build.direcoty}/spark-dist + + + true + + org.apache.maven.plugins maven-antrun-plugin @@ -934,18 +948,21 @@ download-and-zip-pyspark-files - package + generate-resources run - - - + + + + + + diff --git a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java index 092b077359c..852dd335183 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java @@ -159,18 +159,6 @@ public void open() { try { Map env = EnvironmentUtils.getProcEnvironment(); - String pythonPath = (String) env.get("PYTHONPATH"); - if (pythonPath == null) { - pythonPath = ""; - } else { - pythonPath += ":"; - } - - pythonPath += getSparkHome() + "/python/lib/py4j-0.8.2.1-src.zip:" - + getSparkHome() + "/python"; - - env.put("PYTHONPATH", pythonPath); - executor.execute(cmd, env, this); pythonscriptRunning = true; } catch (IOException e) { diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java index ab3609ab422..57703b0eb3f 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java @@ -26,12 +26,9 @@ import java.lang.reflect.Method; import java.net.URL; import java.net.URLClassLoader; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; +import java.util.*; +import com.google.common.base.Joiner; import org.apache.spark.HttpServer; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; @@ -272,6 +269,18 @@ public SparkContext createSparkContext() { conf.set(key, val); } } + + //TODO(jongyoul): Move these codes into PySparkInterpreter.java + String zeppelinHome = getSystemDefault("ZEPPELIN_HOME", "zeppelin.home", "../"); + File zeppelinPythonLibPath = new File(zeppelinHome, "python/lib"); + String[] pythonLibs = new String[] {"pyspark.zip", "py4j-0.8.2.1-src.zip"}; + ArrayList pythonLibUris = new ArrayList<>(); + for (String lib: pythonLibs) { + pythonLibUris.add(new File(zeppelinPythonLibPath, lib).toURI().toString()); + } + conf.set("spark.yarn.dist.files", Joiner.on(",").join(pythonLibUris)); + conf.set("spark.files", conf.get("spark.yarn.dist.files")); + conf.set("spark.submit.pyArchives", Joiner.on(":").join(pythonLibs)); SparkContext sparkContext = new SparkContext(conf); return sparkContext; From 71e2a92b692583dd6d19bd676cb332946e01f00c Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Wed, 24 Jun 2015 15:29:01 +0900 Subject: [PATCH 04/22] [ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node - Removed verbose setting --- spark/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/spark/pom.xml b/spark/pom.xml index 59ee5e806f5..a8a6780877c 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -938,7 +938,6 @@ ${project.build.direcoty}/spark-dist - true From b05ae6e23b08cce1d38869087ee5689f6a3d4fe4 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Wed, 24 Jun 2015 19:32:29 +0900 Subject: [PATCH 05/22] [ZEPPELIN-18] Remove setting SPARK_HOME for PySpark - Excludes python/** from apache-rat --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 8b4762ed898..8d0d7cd8c65 100644 --- a/pom.xml +++ b/pom.xml @@ -430,6 +430,7 @@ Roadmap.md conf/interpreter.json spark-*-bin*/** + python/** From 0a2d90eb4f1e17a2d58976b98b62839533ba2622 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Fri, 3 Jul 2015 12:39:03 +0900 Subject: [PATCH 06/22] rebased --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 1358a425246..9ed7d818dcd 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,6 @@ zeppelin-web/bower_components **nbproject/ **node/ - # project level /logs/ /run/ @@ -36,7 +35,6 @@ zeppelin-web/bower_components /warehouse/ /notebook/ /local-repo/ - **/sessions/ **/data/ **/build/ @@ -77,3 +75,4 @@ auto-save-list tramp .\#* *.swp +**/dependency-reduced-pom.xml From 64b819582faf897e3d3454593d5c5845fa34c213 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Thu, 25 Jun 2015 14:35:04 +0900 Subject: [PATCH 07/22] [ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node - rebasing --- spark/pom.xml | 132 ++++++++++-------- .../src/assemble/distribution.xml | 3 + 2 files changed, 74 insertions(+), 61 deletions(-) diff --git a/spark/pom.xml b/spark/pom.xml index a8a6780877c..e4fc84dcacd 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -726,6 +726,77 @@ + + yarn-pyspark + + http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz + + + + + + com.googlecode.maven-download-plugin + download-maven-plugin + 1.2.1 + + + download-pyspark-files + validate + + wget + + + ${spark.download.url} + true + ${project.build.directory}/spark-dist + + + + + + maven-clean-plugin + + + + ${basedir}/../python/build + + + ${project.build.direcoty}/spark-dist + + + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.7 + + + download-and-zip-pyspark-files + generate-resources + + run + + + + + + + + + + + + + + + + + + hadoop-provided @@ -907,67 +978,6 @@ - - - com.googlecode.maven-download-plugin - download-maven-plugin - 1.2.1 - - - download-pyspark-files - validate - - wget - - - ${spark.download.url} - true - ${project.build.directory}/spark-dist - - - - - - maven-clean-plugin - - - - ${basedir}/../python/build - - - ${project.build.direcoty}/spark-dist - - - - - - org.apache.maven.plugins - maven-antrun-plugin - 1.7 - - - download-and-zip-pyspark-files - generate-resources - - run - - - - - - - - - - - - - - - org.scala-tools diff --git a/zeppelin-distribution/src/assemble/distribution.xml b/zeppelin-distribution/src/assemble/distribution.xml index dae34c409c2..3e7827628cd 100644 --- a/zeppelin-distribution/src/assemble/distribution.xml +++ b/zeppelin-distribution/src/assemble/distribution.xml @@ -73,6 +73,9 @@ ../notebook + + ../python +
+======= +

You are using an outdated browser. Please upgrade your browser to improve your experience.

+ + +>>>>>>> [ZEPPELIN-18] Remove setting SPARK_HOME for PySpark:zeppelin-web/app/index.html
From 94bdf30e550cb71315a36c900d3101ba97c5db82 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Thu, 25 Jun 2015 01:33:14 +0900 Subject: [PATCH 09/22] [ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node - Fixed checkstyle --- .../org/apache/zeppelin/spark/SparkInterpreter.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java index 7d2359063bb..bf229cadecc 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java @@ -269,20 +269,20 @@ public SparkContext createSparkContext() { conf.set(key, val); } } - + //TODO(jongyoul): Move these codes into PySparkInterpreter.java String zeppelinHome = getSystemDefault("ZEPPELIN_HOME", "zeppelin.home", "../"); File zeppelinPythonLibPath = new File(zeppelinHome, "python/lib"); - String[] pythonLibs = new String[] {"pyspark.zip", "py4j-0.8.2.1-src.zip"}; - ArrayList pythonLibUris = new ArrayList<>(); - for (String lib: pythonLibs) { + String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.8.2.1-src.zip"}; + ArrayList pythonLibUris = new ArrayList<>(); + for (String lib : pythonLibs) { File libFile = new File(zeppelinPythonLibPath, lib); - if(libFile.exists()) { + if (libFile.exists()) { pythonLibUris.add(libFile.toURI().toString()); } } pythonLibUris.trimToSize(); - if(pythonLibs.length == pythonLibUris.size()) { + if (pythonLibs.length == pythonLibUris.size()) { conf.set("spark.yarn.dist.files", Joiner.on(",").join(pythonLibUris)); conf.set("spark.files", conf.get("spark.yarn.dist.files")); conf.set("spark.submit.pyArchives", Joiner.on(":").join(pythonLibs)); From ad610fb9bcec77789fb024c1a6eca4e09c47827b Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Fri, 3 Jul 2015 12:41:11 +0900 Subject: [PATCH 10/22] rebased --- zeppelin-web/src/app/home/home.css | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zeppelin-web/src/app/home/home.css b/zeppelin-web/src/app/home/home.css index 584e896d77d..57b0b59af44 100644 --- a/zeppelin-web/src/app/home/home.css +++ b/zeppelin-web/src/app/home/home.css @@ -11,9 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* @import url(//fonts.googleapis.com/css?family=Patua+One); +@import url(//fonts.googleapis.com/css?family=Patua+One); @import url(//fonts.googleapis.com/css?family=Roboto); -@import url(//fonts.googleapis.com/css?family=Source+Code+Pro); */ +@import url(//fonts.googleapis.com/css?family=Source+Code+Pro); body { padding-top: 60px; From 8a7bf47d8fa7e97ab12f951fa86a31eeb5fad9f4 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Thu, 25 Jun 2015 14:37:34 +0900 Subject: [PATCH 11/22] [ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node - rebasing --- .travis.yml | 24 ++++++++++++------------ spark/pom.xml | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4031e781edd..12994409433 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,34 +22,34 @@ before_install: - "sh -e /etc/init.d/xvfb start" install: - - mvn package -DskipTests -Phadoop-2.3 -B + - mvn package -DskipTests -Phadoop-2.3 -Ppyspark -B before_script: - script: # spark 1.4 - - mvn package -Pbuild-distr -Phadoop-2.3 -B + - mvn package -Pbuild-distr -Phadoop-2.3 -Ppyspark -B - ./testing/startSparkCluster.sh 1.4.0 2.3 - - SPARK_HOME=./spark-1.4.1-bin-hadoop2.3 mvn verify -Pusing-packaged-distr -Phadoop-2.3 -B + - SPARK_HOME=./spark-1.4.1-bin-hadoop2.3 mvn verify -Pusing-packaged-distr -Phadoop-2.3 -Ppyspark -B - ./testing/stopSparkCluster.sh 1.4.0 2.3 # spark 1.3 - - mvn clean package -DskipTests -Pspark-1.3 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark' - - mvn package -Pbuild-distr -Pspark-1.3 -Phadoop-2.3 -B + - mvn clean package -DskipTests -Pspark-1.3 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark' + - mvn package -Pbuild-distr -Pspark-1.3 -Phadoop-2.3 -Ppyspark -B - ./testing/startSparkCluster.sh 1.3.1 2.3 - - SPARK_HOME=./spark-1.3.1-bin-hadoop2.3 mvn verify -Pspark-1.3 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark' + - SPARK_HOME=./spark-1.3.1-bin-hadoop2.3 mvn verify -Pspark-1.3 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark' - ./testing/stopSparkCluster.sh 1.3.1 2.3 # spark 1.2 - - mvn clean package -DskipTests -Pspark-1.2 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark' - - mvn package -Pbuild-distr -Pspark-1.2 -Phadoop-2.3 -B + - mvn clean package -DskipTests -Pspark-1.2 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark' + - mvn package -Pbuild-distr -Pspark-1.2 -Phadoop-2.3 -Ppyspark -B - ./testing/startSparkCluster.sh 1.2.1 2.3 - - SPARK_HOME=./spark-1.2.1-bin-hadoop2.3 mvn verify -Pspark-1.2 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark' + - SPARK_HOME=./spark-1.2.1-bin-hadoop2.3 mvn verify -Pspark-1.2 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark' - ./testing/stopSparkCluster.sh 1.2.1 2.3 # spark 1.1 - - mvn clean package -DskipTests -Pspark-1.1 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark' - - mvn package -Pbuild-distr -Pspark-1.1 -Phadoop-2.3 -B + - mvn clean package -DskipTests -Pspark-1.1 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark' + - mvn package -Pbuild-distr -Pspark-1.1 -Phadoop-2.3 -Ppyspark -B - ./testing/startSparkCluster.sh 1.1.1 2.3 - - SPARK_HOME=./spark-1.1.1-bin-hadoop2.3 mvn verify -Pspark-1.1 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark' + - SPARK_HOME=./spark-1.1.1-bin-hadoop2.3 mvn verify -Pspark-1.1 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark' - ./testing/stopSparkCluster.sh 1.1.1 2.3 after_failure: diff --git a/spark/pom.xml b/spark/pom.xml index e4fc84dcacd..71bd4e40175 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -727,7 +727,7 @@ - yarn-pyspark + pyspark http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz From 682986e6a7e0d399ccbc619a3445041b2c512238 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Fri, 3 Jul 2015 12:41:39 +0900 Subject: [PATCH 12/22] rebased --- zeppelin-web/src/app/home/home.css | 12 ++++++++---- zeppelin-web/src/index.html | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/zeppelin-web/src/app/home/home.css b/zeppelin-web/src/app/home/home.css index 57b0b59af44..e09ed24c044 100644 --- a/zeppelin-web/src/app/home/home.css +++ b/zeppelin-web/src/app/home/home.css @@ -184,7 +184,7 @@ a.navbar-brand:hover { margin-bottom: 20px; } -.box, +.box, .well { background-color: #ffffff; border-color: #e5e5e5; @@ -236,11 +236,15 @@ h6.box-heading{ } .zeppelin2 { +<<<<<<< HEAD:zeppelin-web/src/app/home/home.css <<<<<<< HEAD:zeppelin-web/src/app/home/home.css background-image: url('/assets/images/zepLogo.png'); ======= background-image: url('../images/zepLogo.png'); >>>>>>> [ZEPPELIN-18] Remove setting SPARK_HOME for PySpark:zeppelin-web/app/styles/main.css +======= + background-image: url('../images/zepLogo.png'); +>>>>>>> Reset unwanted changes:zeppelin-web/app/styles/main.css background-repeat: no-repeat; background-position: right; background-position-y: 12px; @@ -281,9 +285,9 @@ kbd { } -/* -temporary fix for bootstrap issue (https://github.com/twbs/bootstrap/issues/5865) -This part should be removed when new version of bootstrap handles this issue. +/* +temporary fix for bootstrap issue (https://github.com/twbs/bootstrap/issues/5865) +This part should be removed when new version of bootstrap handles this issue. */ .btn-group > .tooltip + .btn, .btn-group > .popover + .btn { diff --git a/zeppelin-web/src/index.html b/zeppelin-web/src/index.html index 6a6836a52aa..120c6de163e 100644 --- a/zeppelin-web/src/index.html +++ b/zeppelin-web/src/index.html @@ -85,7 +85,7 @@ Interpreter -