diff --git a/bin/pyspark b/bin/pyspark index 39a20e2a24a3..c39eb62ce5a8 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -23,6 +23,8 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" +source $FWDIR/bin/utils.sh + SCALA_VERSION=2.10 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then @@ -67,9 +69,10 @@ fi # We export Spark submit arguments as an environment variable because shell.py must run as a # PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks. +gatherSparkSubmitOpts $@ PYSPARK_SUBMIT_ARGS="" whitespace="[[:space:]]" -for i in "$@"; do +for i in ${SUBMISSION_OPTS[@]}; do if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i" @@ -90,7 +93,10 @@ fi if [[ "$1" =~ \.py$ ]]; then echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2 echo -e "Use ./bin/spark-submit \n" 1>&2 - exec $FWDIR/bin/spark-submit "$@" + primary=$1 + shift + gatherSparkSubmitOpts $@ + exec $FWDIR/bin/spark-submit ${SUBMISSION_OPTS[@]} $primary ${APPLICATION_OPTS[@]} else # Only use ipython if no command line arguments were provided [SPARK-1134] if [[ "$IPYTHON" = "1" ]]; then diff --git a/bin/spark-shell b/bin/spark-shell index 756c8179d12b..c30fd5cb8930 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -37,7 +37,10 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then exit 0 fi -function main(){ +source $FWDIR/bin/utils.sh +gatherSparkSubmitOpts $@ + +function main() { if $cygwin; then # Workaround for issue involving JLine and Cygwin # (see http://sourceforge.net/p/jline/bugs/40/). @@ -46,11 +49,11 @@ function main(){ # (see https://github.com/sbt/sbt/issues/562). stty -icanon min 1 -echo > /dev/null 2>&1 export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix" - $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@" + $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main ${SUBMISSION_OPTS[@]} spark-shell ${APPLICATION_OPTS[@]} stty icanon echo > /dev/null 2>&1 else export SPARK_SUBMIT_OPTS - $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@" + $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main ${SUBMISSION_OPTS[@]} spark-shell ${APPLICATION_OPTS[@]} fi } diff --git a/bin/utils.sh b/bin/utils.sh new file mode 100644 index 000000000000..9c56b99488e3 --- /dev/null +++ b/bin/utils.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Gather all all spark-submit options into SUBMISSION_OPTS +function gatherSparkSubmitOpts() { + SUBMISSION_OPTS=() + APPLICATION_OPTS=() + while (($#)); do + case $1 in + --master | --deploy-mode | --class | --name | --jars | --py-files | --files) + ;& + + --conf | --properties-file | --driver-memory | --driver-java-options) + ;& + + --driver-library-path | --driver-class-path | --executor-memory | --driver-cores) + ;& + + --total-executor-cores | --executor-cores | --queue | --num-executors | --archives) + if [[ $# -lt 2 ]]; then + usage + exit 1; + fi + SUBMISSION_OPTS+=($1); shift + SUBMISSION_OPTS+=($1); shift + ;; + + --verbose | -v | --supervise) + SUBMISSION_OPTS+=($1); shift + ;; + + *) + APPLICATION_OPTS+=($1); shift + ;; + esac + done + + export SUBMISSION_OPTS + export APPLICATION_OPTS +} diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 37386ab0d7d4..c7f7c1fe591b 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -39,7 +39,7 @@ def launch_gateway(): submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS") submit_args = submit_args if submit_args is not None else "" submit_args = shlex.split(submit_args) - command = [os.path.join(SPARK_HOME, script), "pyspark-shell"] + submit_args + command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"] if not on_windows: # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func():