Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion dev/create-release/generate-contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import re
import sys

from releaseutils import *
from releaseutils import tag_exists, raw_input, get_commits, yesOrNoPrompt, get_date, \
is_valid_author, capitalize_author, JIRA, find_components, translate_issue_type, \
translate_component, CORE_COMPONENT, contributors_file_name, nice_join

# You must set the following before use!
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
Expand Down
12 changes: 10 additions & 2 deletions dev/create-release/translate-contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,15 @@
import os
import sys

from releaseutils import *
from releaseutils import JIRA, JIRAError, get_jira_name, Github, get_github_name, \
contributors_file_name, is_valid_author, raw_input, capitalize_author, yesOrNoPrompt

try:
import unidecode
except ImportError:
print("This tool requires the unidecode library to decode obscure github usernames")
print("Install using 'sudo pip install unidecode'")
sys.exit(-1)

# You must set the following before use!
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
Expand Down Expand Up @@ -135,7 +143,7 @@ def generate_candidates(author, issues):
# Note that the candidate name may already be in unicode (JIRA returns this)
for i, (candidate, source) in enumerate(candidates):
try:
candidate = unicode(candidate, "UTF-8")
candidate = unicode(candidate, "UTF-8") # noqa: F821
except TypeError:
# already in unicode
pass
Expand Down
2 changes: 1 addition & 1 deletion dev/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ max-line-length=100
exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*

[flake8]
select = E901,E999,F821,F822,F823,F401
select = E901,E999,F821,F822,F823,F401,F405
exclude = python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
max-line-length = 100
2 changes: 1 addition & 1 deletion examples/src/main/python/sql/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

# $example on:programmatic_schema$
# Import data types
from pyspark.sql.types import *
from pyspark.sql.types import StringType, StructType, StructField
# $example off:programmatic_schema$


Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@
from pyspark.context import SparkContext
from pyspark.rdd import RDD, RDDBarrier
from pyspark.files import SparkFiles
from pyspark.status import StatusTracker, SparkJobInfo, SparkStageInfo
from pyspark.util import InheritableThread
from pyspark.storagelevel import StorageLevel
from pyspark.accumulators import Accumulator, AccumulatorParam
from pyspark.broadcast import Broadcast
from pyspark.serializers import MarshalSerializer, PickleSerializer
from pyspark.status import *
from pyspark.taskcontext import TaskContext, BarrierTaskContext, BarrierTaskInfo
from pyspark.profiler import Profiler, BasicProfiler
from pyspark.version import __version__ # noqa: F401
Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/ml/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
import threading

from pyspark import since
from pyspark.ml.param.shared import *
from pyspark.ml.common import inherit_doc
from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasLabelCol, HasFeaturesCol, \
HasPredictionCol, Params
from pyspark.sql.functions import udf
from pyspark.sql.types import StructField, StructType

Expand Down
11 changes: 8 additions & 3 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,24 @@
#

import operator
import sys
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import uuid
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Fokko, do you mind opening a minor PR to add this uuid to all other branches - looks like it wasn't there from the very first place Fokko@90b46e0?

I checked other occurrences fixed here, and other ones look fine to not fix in other branches.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh @Fokko sorry I misread. It was fine because we're using wildcard imports at from pyspark.ml.util import * and util imports uuid. So it didn't crash before .. Let's just don't port this back alone :-)..

import warnings
from abc import ABCMeta, abstractmethod, abstractproperty
from multiprocessing.pool import ThreadPool

from pyspark import keyword_only
from pyspark import keyword_only, since, SparkContext
from pyspark.ml import Estimator, Predictor, PredictionModel, Model
from pyspark.ml.param.shared import *
from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \
HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \
HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \
HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
_TreeEnsembleModel, _RandomForestParams, _GBTParams, \
_HasVarianceImpurity, _TreeClassifierParams
from pyspark.ml.regression import _FactorizationMachinesParams, DecisionTreeRegressionModel
from pyspark.ml.util import *
from pyspark.ml.base import _PredictorParams
from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary
from pyspark.ml.wrapper import JavaParams, \
JavaPredictor, JavaPredictionModel, JavaWrapper
from pyspark.ml.common import inherit_doc, _java2py, _py2java
Expand Down
7 changes: 5 additions & 2 deletions python/pyspark/ml/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
import warnings

from pyspark import since, keyword_only
from pyspark.ml.util import *
from pyspark.ml.param.shared import HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, \
HasAggregationDepth, HasWeightCol, HasTol, HasProbabilityCol, HasBlockSize, \
HasDistanceMeasure, HasCheckpointInterval, Param, Params, TypeConverters
from pyspark.ml.util import JavaMLWritable, JavaMLReadable, GeneralJavaMLWritable, \
HasTrainingSummary, SparkContext
from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper
from pyspark.ml.param.shared import *
from pyspark.ml.common import inherit_doc, _java2py
from pyspark.ml.stat import MultivariateGaussian
from pyspark.sql import DataFrame
Expand Down
4 changes: 3 additions & 1 deletion python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

from pyspark import since, keyword_only, SparkContext
from pyspark.ml.linalg import _convert_to_vector
from pyspark.ml.param.shared import *
from pyspark.ml.param.shared import HasThreshold, HasThresholds, HasInputCol, HasOutputCol, \
HasInputCols, HasOutputCols, HasHandleInvalid, HasRelativeError, HasFeaturesCol, HasLabelCol, \
HasSeed, HasNumFeatures, HasStepSize, HasMaxIter, TypeConverters, Param, Params
from pyspark.ml.util import JavaMLReadable, JavaMLWritable
from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer, _jvm
from pyspark.ml.common import inherit_doc
Expand Down
8 changes: 5 additions & 3 deletions python/pyspark/ml/fpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
# limitations under the License.
#

from pyspark import keyword_only
import sys
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This import was missing as well


from pyspark import keyword_only, since
from pyspark.sql import DataFrame
from pyspark.ml.util import *
from pyspark.ml.util import JavaMLWritable, JavaMLReadable
from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
from pyspark.ml.param.shared import *
from pyspark.ml.param.shared import HasPredictionCol, Param, TypeConverters, Params

__all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan"]

Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/ml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing import


from pyspark import keyword_only
from pyspark import keyword_only, since, SparkContext
from pyspark.ml.base import Estimator, Model, Transformer
from pyspark.ml.param import Param, Params
from pyspark.ml.util import *
from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \
DefaultParamsReader, DefaultParamsWriter, MLWriter, MLReader, JavaMLWritable
from pyspark.ml.wrapper import JavaParams, JavaWrapper
from pyspark.ml.common import inherit_doc, _java2py, _py2java

Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/ml/recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
import sys

from pyspark import since, keyword_only
from pyspark.ml.util import *
from pyspark.ml.param.shared import HasPredictionCol, HasBlockSize, HasMaxIter, HasRegParam, \
HasCheckpointInterval, HasSeed
from pyspark.ml.wrapper import JavaEstimator, JavaModel
from pyspark.ml.param.shared import *
from pyspark.ml.common import inherit_doc
from pyspark.ml.param import Params, TypeConverters, Param
from pyspark.ml.util import JavaMLWritable, JavaMLReadable


__all__ = ['ALS', 'ALSModel']
Expand Down
12 changes: 9 additions & 3 deletions python/pyspark/ml/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@
# limitations under the License.
#

import sys

from abc import ABCMeta

from pyspark import keyword_only
from pyspark import keyword_only, since
from pyspark.ml import Predictor, PredictionModel
from pyspark.ml.base import _PredictorParams
from pyspark.ml.param.shared import *
from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol, \
Param, Params, TypeConverters, HasMaxIter, HasTol, HasFitIntercept, HasAggregationDepth, \
HasBlockSize, HasRegParam, HasSolver, HasStepSize, HasSeed, HasElasticNetParam, \
HasStandardization, HasLoss, HasVarianceCol
from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
_TreeEnsembleModel, _RandomForestParams, _GBTParams, _TreeRegressorParams
from pyspark.ml.util import *
from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary, \
GeneralJavaMLWritable
from pyspark.ml.wrapper import JavaEstimator, JavaModel, \
JavaPredictor, JavaPredictionModel, JavaWrapper
from pyspark.ml.common import inherit_doc
Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/ml/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
# limitations under the License.
#

from pyspark.ml.param.shared import *
from pyspark.ml.util import *
from pyspark import since
from pyspark.ml.param import Params
from pyspark.ml.param.shared import HasCheckpointInterval, HasSeed, HasWeightCol, Param, \
TypeConverters, HasMaxIter, HasStepSize, HasValidationIndicatorCol
from pyspark.ml.wrapper import JavaPredictionModel
from pyspark.ml.common import inherit_doc

Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/ml/tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import sys
import itertools
from multiprocessing.pool import ThreadPool

import numpy as np

from pyspark import keyword_only
from pyspark import keyword_only, since, SparkContext
from pyspark.ml import Estimator, Model
from pyspark.ml.common import _py2java, _java2py
from pyspark.ml.param import Params, Param, TypeConverters
from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
from pyspark.ml.util import *
from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader
from pyspark.ml.wrapper import JavaParams
from pyspark.sql.functions import col, lit, rand, UserDefinedFunction
from pyspark.sql.types import BooleanType
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/mllib/stat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Python package for statistical functions in MLlib.
"""

from pyspark.mllib.stat._statistics import *
from pyspark.mllib.stat._statistics import Statistics, MultivariateStatisticalSummary
from pyspark.mllib.stat.distribution import MultivariateGaussian
from pyspark.mllib.stat.test import ChiSqTestResult
from pyspark.mllib.stat.KernelDensity import KernelDensity
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from pyspark import copy_func, since
from pyspark.context import SparkContext
from pyspark.sql.types import *
from pyspark.sql.types import DataType, StructField, StructType, IntegerType, StringType

__all__ = ["Column"]

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column
from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
from pyspark.sql.streaming import DataStreamWriter
from pyspark.sql.types import *
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.pandas.conversion import PandasConversionMixin
from pyspark.sql.pandas.map_ops import PandasMapOpsMixin

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pyspark.sql.column import Column, _to_seq
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin
from pyspark.sql.types import *
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

__all__ = ["GroupedData"]

Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/sql/pandas/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from pyspark.rdd import _load_from_socket
from pyspark.sql.pandas.serializers import ArrowCollectSerializer
from pyspark.sql.types import IntegralType
from pyspark.sql.types import *
from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \
DoubleType, BooleanType, TimestampType, StructType, DataType
from pyspark.traceback_utils import SCCallSiteSync


Expand Down
4 changes: 3 additions & 1 deletion python/pyspark/sql/pandas/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
pandas instances during the type conversion.
"""

from pyspark.sql.types import *
from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \
DoubleType, DecimalType, StringType, BinaryType, DateType, TimestampType, ArrayType, \
StructType, StructField, BooleanType


def to_arrow_type(dt):
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/sql/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys

from py4j.java_gateway import JavaClass

from pyspark import RDD, since
from pyspark.sql.column import _to_seq, _to_java_column
from pyspark.sql.types import *
from pyspark.sql.types import StructType
from pyspark.sql import utils
from pyspark.sql.utils import to_str

Expand Down Expand Up @@ -1225,7 +1226,6 @@ def overwrite(self, condition):
Overwrite rows matching the given filter condition with the contents of the data frame in
the output table.
"""
condition = _to_java_column(column)
self._jwriter.overwrite(condition)

@since(3.1)
Expand Down
6 changes: 3 additions & 3 deletions python/pyspark/sql/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from pyspark import since, keyword_only
from pyspark.sql.column import _to_seq
from pyspark.sql.readwriter import OptionUtils, to_str
from pyspark.sql.types import *
from pyspark.sql.types import StructType, StructField, StringType
from pyspark.sql.utils import ForeachBatchFunction, StreamingQueryException

__all__ = ["StreamingQuery", "StreamingQueryManager", "DataStreamReader", "DataStreamWriter"]
Expand Down Expand Up @@ -1239,8 +1239,8 @@ def _test():
globs = pyspark.sql.streaming.__dict__.copy()
try:
spark = SparkSession.builder.getOrCreate()
except py4j.protocol.Py4JError:
spark = SparkSession(sc)
except py4j.protocol.Py4JError: # noqa: F821
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was failing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to run at, and it seems to run:

MacBook-Pro-van-Fokko:spark fokkodriesprong$ python3 ./python/pyspark/sql/streaming.py
20/08/28 11:41:27 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
20/08/28 11:41:33 WARN StreamingQueryManager: Temporary checkpoint location created which is deleted normally when the query didn't fail: /private/var/folders/km/xypq2kxs4ys3dt6bwtd4fbj00000gn/T/temporary-ca2e7725-cb1f-4ac9-adc6-5533ab36db58. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
20/08/28 11:41:34 WARN StreamingQueryManager: Temporary checkpoint location created which is deleted normally when the query didn't fail: /private/var/folders/km/xypq2kxs4ys3dt6bwtd4fbj00000gn/T/temporary-0c6bf4bd-795f-4a82-ae4c-72f2cd7751ab. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
20/08/28 11:41:34 WARN Shell: Interrupted while joining on: Thread[Thread-48,5,]
java.lang.InterruptedException
	at java.lang.Object.wait(Native Method)
	at java.lang.Thread.join(Thread.java:1252)
	at java.lang.Thread.join(Thread.java:1326)
	at org.apache.hadoop.util.Shell.joinThread(Shell.java:629)
	at org.apache.hadoop.util.Shell.runCommand(Shell.java:580)
	at org.apache.hadoop.util.Shell.run(Shell.java:482)
	at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:776)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:869)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:852)
	at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
	at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
	at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
	at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
	at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
	at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
	at org.apache.hadoop.fs.FileSystem.primitiveCreate(FileSystem.java:1017)
	at org.apache.hadoop.fs.DelegateToFileSystem.createInternal(DelegateToFileSystem.java:100)
	at org.apache.hadoop.fs.ChecksumFs$ChecksumFSOutputSummer.<init>(ChecksumFs.java:353)
	at org.apache.hadoop.fs.ChecksumFs.createInternal(ChecksumFs.java:400)
	at org.apache.hadoop.fs.AbstractFileSystem.create(AbstractFileSystem.java:596)
	at org.apache.hadoop.fs.FileContext$3.next(FileContext.java:686)
	at org.apache.hadoop.fs.FileContext$3.next(FileContext.java:682)
	at org.apache.hadoop.fs.FSLinkResolver.resolve(FSLinkResolver.java:90)
	at org.apache.hadoop.fs.FileContext.create(FileContext.java:688)
	at org.apache.spark.sql.execution.streaming.FileContextBasedCheckpointFileManager.createTempFile(CheckpointFileManager.scala:310)
	at org.apache.spark.sql.execution.streaming.CheckpointFileManager$RenameBasedFSDataOutputStream.<init>(CheckpointFileManager.scala:133)
	at org.apache.spark.sql.execution.streaming.CheckpointFileManager$RenameBasedFSDataOutputStream.<init>(CheckpointFileManager.scala:136)
	at org.apache.spark.sql.execution.streaming.FileContextBasedCheckpointFileManager.createAtomic(CheckpointFileManager.scala:316)
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog.writeBatchToFile(HDFSMetadataLog.scala:131)
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog.$anonfun$add$3(HDFSMetadataLog.scala:120)
	at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog.add(HDFSMetadataLog.scala:118)
	at org.apache.spark.sql.execution.streaming.CompactibleFileStreamLog.add(CompactibleFileStreamLog.scala:158)
	at org.apache.spark.sql.execution.streaming.FileStreamSourceLog.add(FileStreamSourceLog.scala:69)
	at org.apache.spark.sql.execution.streaming.FileStreamSource.fetchMaxOffset(FileStreamSource.scala:150)
	at org.apache.spark.sql.execution.streaming.FileStreamSource.latestOffset(FileStreamSource.scala:286)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$3(MicroBatchExecution.scala:380)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:352)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:350)
	at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$2(MicroBatchExecution.scala:371)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
	at scala.collection.immutable.Map$Map1.foreach(Map.scala:128)
	at scala.collection.TraversableLike.map(TraversableLike.scala:238)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$1(MicroBatchExecution.scala:368)
	at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.withProgressLocked(MicroBatchExecution.scala:598)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.constructNextBatch(MicroBatchExecution.scala:364)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:208)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:352)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:350)
	at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:191)
	at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:185)
	at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:334)
	at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:245)
20/08/28 11:41:36 WARN StreamingQueryManager: Temporary checkpoint location created which is deleted normally when the query didn't fail: /private/var/folders/km/xypq2kxs4ys3dt6bwtd4fbj00000gn/T/temporary-3c27fa5e-7644-437d-9832-cac5168d4af5. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
20/08/28 11:41:36 WARN StreamingQueryManager: Temporary checkpoint location created which is deleted normally when the query didn't fail: /private/var/folders/km/xypq2kxs4ys3dt6bwtd4fbj00000gn/T/temporary-5d2c3db7-4d59-4e0b-953e-429a9d398ef2. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
20/08/28 11:41:36 WARN Shell: Interrupted while joining on: Thread[Thread-110,5,]
java.lang.InterruptedException
	at java.lang.Object.wait(Native Method)
	at java.lang.Thread.join(Thread.java:1252)
	at java.lang.Thread.join(Thread.java:1326)
	at org.apache.hadoop.util.Shell.joinThread(Shell.java:629)
	at org.apache.hadoop.util.Shell.runCommand(Shell.java:580)
	at org.apache.hadoop.util.Shell.run(Shell.java:482)
	at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:776)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:869)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:852)
	at org.apache.hadoop.fs.FileUtil.readLink(FileUtil.java:160)
	at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileLinkStatusInternal(RawLocalFileSystem.java:837)
	at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:826)
	at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatus(RawLocalFileSystem.java:797)
	at org.apache.hadoop.fs.DelegateToFileSystem.getFileLinkStatus(DelegateToFileSystem.java:131)
	at org.apache.hadoop.fs.AbstractFileSystem.renameInternal(AbstractFileSystem.java:717)
	at org.apache.hadoop.fs.FilterFs.renameInternal(FilterFs.java:240)
	at org.apache.hadoop.fs.AbstractFileSystem.rename(AbstractFileSystem.java:690)
	at org.apache.hadoop.fs.FileContext.rename(FileContext.java:958)
	at org.apache.spark.sql.execution.streaming.FileContextBasedCheckpointFileManager.renameTempFile(CheckpointFileManager.scala:329)
	at org.apache.spark.sql.execution.streaming.CheckpointFileManager$RenameBasedFSDataOutputStream.close(CheckpointFileManager.scala:147)
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog.writeBatchToFile(HDFSMetadataLog.scala:134)
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog.$anonfun$add$3(HDFSMetadataLog.scala:120)
	at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog.add(HDFSMetadataLog.scala:118)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$12(MicroBatchExecution.scala:418)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:352)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:350)
	at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$1(MicroBatchExecution.scala:416)
	at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.withProgressLocked(MicroBatchExecution.scala:598)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.constructNextBatch(MicroBatchExecution.scala:364)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:208)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:352)
	at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:350)
	at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:191)
	at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:185)
	at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:334)
	at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:245)
20/08/28 11:41:37 WARN StreamingQueryManager: Temporary checkpoint location created which is deleted normally when the query didn't fail: /private/var/folders/km/xypq2kxs4ys3dt6bwtd4fbj00000gn/T/temporary-be1a0476-b88e-4b75-ac37-0486dbfe5a2e. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
MacBook-Pro-van-Fokko:spark fokkodriesprong$ echo $?
0

I was under the impression that the py4j and sc were included using the line above, that is was being pulled from some global context, but this isn't the case. It seems to run, but maybe it doesn't hit the except. It looks like some lingering test-code, maybe move this to the tests directory?

cc @zsxwing @tdas

spark = SparkSession(sc) # noqa: F821

globs['tempfile'] = tempfile
globs['os'] = os
Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
from pyspark import SparkContext, SparkConf
from pyspark.sql import Row, SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import *
from pyspark.sql.types import StructType, StringType, IntegerType, LongType, \
FloatType, DoubleType, DecimalType, DateType, TimestampType, BinaryType, StructField, MapType, \
ArrayType
from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
pandas_requirement_message, pyarrow_requirement_message
from pyspark.testing.utils import QuietTest
Expand Down Expand Up @@ -495,7 +497,7 @@ def conf(cls):


if __name__ == "__main__":
from pyspark.sql.tests.test_arrow import *
from pyspark.sql.tests.test_arrow import * # noqa: F401

try:
import xmlrunner
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#

from pyspark.sql import Column, Row
from pyspark.sql.types import *
from pyspark.sql.types import StructType, StructField, LongType
from pyspark.sql.utils import AnalysisException
from pyspark.testing.sqlutils import ReusedSQLTestCase

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from pyspark import SparkContext, SQLContext
from pyspark.sql import Row, SparkSession
from pyspark.sql.types import *
from pyspark.sql.types import StructType, StringType, StructField
from pyspark.sql.window import Window
from pyspark.testing.utils import ReusedPySparkTestCase

Expand Down
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import unittest

from pyspark.sql import SparkSession, Row
from pyspark.sql.types import *
from pyspark.sql.types import StringType, IntegerType, DoubleType, StructType, StructField, \
BooleanType, DateType, TimestampType, FloatType
from pyspark.sql.utils import AnalysisException, IllegalArgumentException
from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils, have_pyarrow, have_pandas, \
pandas_requirement_message, pyarrow_requirement_message
Expand Down Expand Up @@ -903,7 +904,7 @@ def test_query_execution_listener_on_collect_with_arrow(self):


if __name__ == "__main__":
from pyspark.sql.tests.test_dataframe import *
from pyspark.sql.tests.test_dataframe import * # noqa: F401

try:
import xmlrunner
Expand Down
Loading