Skip to content

Commit c3c45cb

Browse files
ueshincloud-fan
authored andcommitted
[SPARK-25540][SQL][PYSPARK] Make HiveContext in PySpark behave as the same as Scala.
## What changes were proposed in this pull request? In Scala, `HiveContext` sets a config `spark.sql.catalogImplementation` of the given `SparkContext` and then passes to `SparkSession.builder`. The `HiveContext` in PySpark should behave as the same as Scala. ## How was this patch tested? Existing tests. Closes #22552 from ueshin/issues/SPARK-25540/hive_context. Authored-by: Takuya UESHIN <ueshin@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent d0990e3 commit c3c45cb

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

python/pyspark/sql/context.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,8 @@ def __init__(self, sparkContext, jhiveContext=None):
485485
"SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
486486
DeprecationWarning)
487487
if jhiveContext is None:
488-
sparkSession = SparkSession.builder.enableHiveSupport().getOrCreate()
488+
sparkContext._conf.set("spark.sql.catalogImplementation", "hive")
489+
sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate()
489490
else:
490491
sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession())
491492
SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext)

python/pyspark/sql/session.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class Builder(object):
8383

8484
_lock = RLock()
8585
_options = {}
86+
_sc = None
8687

8788
@since(2.0)
8889
def config(self, key=None, value=None, conf=None):
@@ -139,6 +140,11 @@ def enableHiveSupport(self):
139140
"""
140141
return self.config("spark.sql.catalogImplementation", "hive")
141142

143+
def _sparkContext(self, sc):
144+
with self._lock:
145+
self._sc = sc
146+
return self
147+
142148
@since(2.0)
143149
def getOrCreate(self):
144150
"""Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
@@ -167,11 +173,14 @@ def getOrCreate(self):
167173
from pyspark.conf import SparkConf
168174
session = SparkSession._instantiatedSession
169175
if session is None or session._sc._jsc is None:
170-
sparkConf = SparkConf()
171-
for key, value in self._options.items():
172-
sparkConf.set(key, value)
173-
sc = SparkContext.getOrCreate(sparkConf)
174-
# This SparkContext may be an existing one.
176+
if self._sc is not None:
177+
sc = self._sc
178+
else:
179+
sparkConf = SparkConf()
180+
for key, value in self._options.items():
181+
sparkConf.set(key, value)
182+
sc = SparkContext.getOrCreate(sparkConf)
183+
# This SparkContext may be an existing one.
175184
for key, value in self._options.items():
176185
# we need to propagate the confs
177186
# before we create the SparkSession. Otherwise, confs like

0 commit comments

Comments
 (0)