diff --git a/.gitignore b/.gitignore index 39d17e1793f77..e6a80bef263e7 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ build/*.jar build/apache-maven* build/scala* build/zinc* +build/venv cache checkpoint conf/*.cmd @@ -47,6 +48,7 @@ docs/_site docs/api lib_managed/ lint-r-report.log +dev/*-report.txt log/ logs/ out/ diff --git a/dev/lint-python b/dev/lint-python index 63487043a50b6..7348dcb6ebb3f 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -17,6 +17,20 @@ # limitations under the License. # + +# expand alias for making virtualenv happy +shopt -s expand_aliases + +if [[ ! -z $CONDA_PREFIX ]]; then + echo "You are inside a Anaconda environment. Leaving it" + source deactivate +fi + +# nice trap to properly handle Ctrl+c +trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT + +VIRTUAL_ENV_DIR="build/venv" + SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")" PATHS_TO_CHECK="./python/pyspark/ ./examples/src/main/python/ ./dev/sparktestsupport" @@ -26,30 +40,47 @@ PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt" PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt" SPHINXBUILD=${SPHINXBUILD:=sphinx-build} SPHINX_REPORT_PATH="$SPARK_ROOT_DIR/dev/sphinx-report.txt" +VIRTUALENV_EXEC="virtualenv" + +# It is a good practice to upgrade pip to the latest version, however, sometimes it might break +# something. Let freeze this version here. We cannot put it into requirements.txt since the +# execution of this requirement files might depends on the pip version. +PIP_VERSION="8.1.2" cd "$SPARK_ROOT_DIR" +DEBUG=false +PIP_ARG="-q" +VIRTUALENV_ARG="-q" +SPHINX_ARGS="-q" +if [[ $1 == "--debug" || $1 == "-d" ]]; then + DEBUG=true + PIP_ARG="" + SPHINX_ARGS="" + VIRTUALENV_ARG="" +fi + + +if [[ ! -z $VIRTUAL_ENV ]]; then + echo "We already are inside a virtual environment: $VIRTUAL_ENV. Using it" +else + echo "Not inside a virtual environment. Jumping into one in: $VIRTUAL_ENV_DIR" + $VIRTUALENV_EXEC $VIRTUALENV_ARG $VIRTUAL_ENV_DIR || exit 1 + source $VIRTUAL_ENV_DIR/bin/activate +fi +if [[ $DEBUG==true ]]; then + echo "Python: $(python --version)" +fi +echo "To activate this environment, use: 'source $SPARK_ROOT_DIR/$VIRTUAL_ENV_DIR/bin/activate'" + +pip install $PIP_ARG --upgrade "pip==$PIP_VERSION" +echo "Installing developer dependencies (from dev/requirements.txt)..." +pip install $PIP_ARG --upgrade -r dev/requirements.txt + # compileall: https://docs.python.org/2/library/compileall.html python -B -m compileall -q -l $PATHS_TO_CHECK > "$PEP8_REPORT_PATH" compile_status="${PIPESTATUS[0]}" -# Get pep8 at runtime so that we don't rely on it being installed on the build server. -#+ See: https://github.com/apache/spark/pull/1744#issuecomment-50982162 -#+ TODOs: -#+ - Download pep8 from PyPI. It's more "official". -PEP8_VERSION="1.7.0" -PEP8_SCRIPT_PATH="$SPARK_ROOT_DIR/dev/pep8-$PEP8_VERSION.py" -PEP8_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/jcrocholl/pep8/$PEP8_VERSION/pep8.py" - -if [ ! -e "$PEP8_SCRIPT_PATH" ]; then - curl --silent -o "$PEP8_SCRIPT_PATH" "$PEP8_SCRIPT_REMOTE_PATH" - curl_status="$?" - - if [ "$curl_status" -ne 0 ]; then - echo "Failed to download pep8.py from \"$PEP8_SCRIPT_REMOTE_PATH\"." - exit "$curl_status" - fi -fi # Easy install pylint in /dev/pylint. To easy_install into a directory, the PYTHONPATH should # be set to the directory. @@ -64,7 +95,8 @@ export "PATH=$PYTHONPATH:$PATH" #+ first, but we do so so that the check status can #+ be output before the report, like with the #+ scalastyle and RAT checks. -python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" +echo "Checking Pep8..." +pep8 --ignore=E402,E731,E241,W503,E226 --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" pep8_status="${PIPESTATUS[0]}" if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then @@ -83,18 +115,35 @@ else rm "$PEP8_REPORT_PATH" fi +echo "Checking Pylint..." +for to_be_checked in "$PATHS_TO_CHECK"; do + [ $DEBUG == true ] && echo ".. $to_be_checked" + pylint --rcfile="$SPARK_ROOT_DIR/python/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH" +done + +if [ "${PIPESTATUS[0]}" -ne 0 ]; then + lint_status=1 + echo "Pylint checks failed." + cat "$PYLINT_REPORT_PATH" +else + echo "Pylint checks passed." +fi + +rm "$PYLINT_REPORT_PATH" + # Check that the documentation builds acceptably, skip check if sphinx is not installed. if hash "$SPHINXBUILD" 2> /dev/null; then cd python/docs + echo "Building docs..." make clean # Treat warnings as errors so we stop correctly - SPHINXOPTS="-a -W" make html &> "$SPHINX_REPORT_PATH" || lint_status=1 + SPHINXOPTS="-a -W $SPHINX_ARGS" make html &> "$SPHINX_REPORT_PATH" || lint_status=1 if [ "$lint_status" -ne 0 ]; then echo "pydoc checks failed." cat "$SPHINX_REPORT_PATH" echo "re-running make html to print full warning list" make clean - SPHINXOPTS="-a" make html + SPHINXOPTS="-a $SPHINX_ARGS" make html rm "$SPHINX_REPORT_PATH" exit "$lint_status" else diff --git a/dev/requirements.txt b/dev/requirements.txt index bf042d22a8b47..749a1ebd9e4a2 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -1,3 +1,10 @@ +# Please keep this list ordered + jira==1.0.3 +numpy==1.11.1 +pep8==1.7.0 PyGithub==1.26.0 +pylint==1.6.4 +requests==2.11.1 +Sphinx==1.4.6 Unidecode==0.04.19 diff --git a/python/pylintrc b/python/pylintrc index 6a675770da69a..d901141d5006d 100644 --- a/python/pylintrc +++ b/python/pylintrc @@ -84,7 +84,87 @@ enable= # If you would like to improve the code quality of pyspark, remove any of these disabled errors # run ./dev/lint-python and see if the errors raised by pylint can be fixed. -disable=invalid-name,missing-docstring,protected-access,unused-argument,no-member,unused-wildcard-import,redefined-builtin,too-many-arguments,unused-variable,too-few-public-methods,bad-continuation,duplicate-code,redefined-outer-name,too-many-ancestors,import-error,superfluous-parens,unused-import,line-too-long,no-name-in-module,unnecessary-lambda,import-self,no-self-use,unidiomatic-typecheck,fixme,too-many-locals,cyclic-import,too-many-branches,bare-except,wildcard-import,dangerous-default-value,broad-except,too-many-public-methods,deprecated-lambda,anomalous-backslash-in-string,too-many-lines,reimported,too-many-statements,bad-whitespace,unpacking-non-sequence,too-many-instance-attributes,abstract-method,old-style-class,global-statement,attribute-defined-outside-init,arguments-differ,undefined-all-variable,no-init,useless-else-on-loop,super-init-not-called,notimplemented-raised,too-many-return-statements,pointless-string-statement,global-variable-undefined,bad-classmethod-argument,too-many-format-args,parse-error,no-self-argument,pointless-statement,undefined-variable,undefined-loop-variable +# Keep one item per line in the following block! +disable= + abstract-method, + anomalous-backslash-in-string, + arguments-differ, + attribute-defined-outside-init, + bad-classmethod-argument, + bad-continuation, + bad-super-call, + bad-whitespace, + bare-except, + broad-except, + consider-iterating-dictionary, + consider-using-enumerate, + cyclic-import, + dangerous-default-value, + deprecated-lambda, + deprecated-method, + duplicate-code, + eval-used, + exec-used, + fixme, + global-statement, + global-variable-undefined, + import-error, + import-self, + invalid-length-returned, + invalid-name, + line-too-long, + misplaced-comparison-constant, + missing-docstring, + no-init, + no-member, + no-name-in-module, + no-self-argument, + no-self-use, + notimplemented-raised, + old-style-class, + parse-error, + pointless-statement, + pointless-string-statement, + protected-access, + raising-bad-type, + redefined-builtin, + redefined-outer-name, + redefined-variable-type, + reimported, + super-init-not-called, + superfluous-parens, + too-few-public-methods, + too-many-ancestors, + too-many-arguments, + too-many-branches, + too-many-format-args, + too-many-instance-attributes, + too-many-lines, + too-many-locals, + too-many-public-methods, + too-many-return-statements, + too-many-statements, + trailing-newlines, + trailing-whitespace, + undefined-all-variable, + undefined-loop-variable, + undefined-variable, + ungrouped-imports, + unidiomatic-typecheck, + unnecessary-lambda, + unnecessary-pass, + unneeded-not, + unpacking-non-sequence, + unsubscriptable-object, + unused-argument, + unused-import, + unused-variable, + unused-wildcard-import, + used-before-assignment, + useless-else-on-loop, + wildcard-import, + wrong-import-order, + wrong-import-position, [REPORTS] @@ -126,9 +206,6 @@ notes=FIXME,XXX,TODO [BASIC] -# Required attributes for module, separated by a comma -required-attributes= - # List of builtins function names that should not be used, separated by a comma bad-functions= @@ -327,10 +404,6 @@ generated-members=REQUEST,acl_users,aq_parent [CLASSES] -# List of interface methods to ignore, separated by a comma. This is used for -# instance to not check methods defines in Zope's Interface base class. -ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by - # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__,__new__,setUp