Skip to content

Commit 5c8727d

Browse files
committed
[SPARK-19458][SQL]load hive jars from local repo which has downloaded
1 parent c86a57f commit 5c8727d

File tree

3 files changed

+25
-14
lines changed

3 files changed

+25
-14
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,6 @@ object SparkSubmit extends CommandLineUtils {
443443
OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
444444
sysProp = "spark.submit.deployMode"),
445445
OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"),
446-
OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars.ivy"),
447446
OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT,
448447
sysProp = "spark.driver.memory"),
449448
OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
@@ -480,7 +479,12 @@ object SparkSubmit extends CommandLineUtils {
480479
sysProp = "spark.driver.cores"),
481480
OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
482481
sysProp = "spark.driver.supervise"),
483-
OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy")
482+
OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
483+
sysProp = "spark.jars.ivy"),
484+
OptionAssigner(args.repositories, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
485+
sysProp = "spark.jars.repositories"),
486+
OptionAssigner(args.sparkProperties.get("spark.jars.ivySettings").orNull,
487+
ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.jars.ivySettings")
484488
)
485489

486490
// In client mode, launch the application main class directly

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,6 @@ private[spark] object HiveUtils extends Logging {
320320
barrierPrefixes = hiveMetastoreBarrierPrefixes,
321321
sharedPrefixes = hiveMetastoreSharedPrefixes)
322322
} else if (hiveMetastoreJars == "maven") {
323-
// TODO: Support for loading the jars from an already downloaded location.
324323
logInfo(
325324
s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using maven.")
326325
IsolatedClientLoader.forVersion(

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ private[hive] object IsolatedClientLoader extends Logging {
5959
} else {
6060
val (downloadedFiles, actualHadoopVersion) =
6161
try {
62-
(downloadVersion(resolvedVersion, hadoopVersion, ivyPath), hadoopVersion)
62+
(downloadVersion(resolvedVersion, hadoopVersion, sparkConf, ivyPath), hadoopVersion)
6363
} catch {
6464
case e: RuntimeException if e.getMessage.contains("hadoop") =>
6565
// If the error message contains hadoop, it is probably because the hadoop
@@ -73,7 +73,7 @@ private[hive] object IsolatedClientLoader extends Logging {
7373
"It is recommended to set jars used by Hive metastore client through " +
7474
"spark.sql.hive.metastore.jars in the production environment.")
7575
sharesHadoopClasses = false
76-
(downloadVersion(resolvedVersion, "2.4.0", ivyPath), "2.4.0")
76+
(downloadVersion(resolvedVersion, "2.4.0", sparkConf, ivyPath), "2.4.0")
7777
}
7878
resolvedVersions.put((resolvedVersion, actualHadoopVersion), downloadedFiles)
7979
resolvedVersions((resolvedVersion, actualHadoopVersion))
@@ -102,28 +102,36 @@ private[hive] object IsolatedClientLoader extends Logging {
102102
private def downloadVersion(
103103
version: HiveVersion,
104104
hadoopVersion: String,
105+
sparkConf: SparkConf,
105106
ivyPath: Option[String]): Seq[URL] = {
106107
val hiveArtifacts = version.extraDeps ++
107108
Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
108109
.map(a => s"org.apache.hive:$a:${version.fullVersion}") ++
109110
Seq("com.google.guava:guava:14.0.1",
110111
s"org.apache.hadoop:hadoop-client:$hadoopVersion")
111112

113+
// if repositories contain a local repo, it will not download jars from remote repo
114+
val repos: Option[String] = Option(sparkConf.get("spark.jars.repositories")).map {
115+
repo =>
116+
Seq(repo, "http://www.datanucleus.org/downloads/maven2").mkString(",")
117+
}.orElse(Some("http://www.datanucleus.org/downloads/maven2"))
118+
119+
val ivyRepoPath = Option(sparkConf.get("spark.jars.ivy"))
120+
val ivySettings = Option(sparkConf.get("spark.jars.ivySettings")).map { ivySettingsFile =>
121+
SparkSubmitUtils.loadIvySettings(ivySettingsFile, repos, ivyRepoPath)
122+
}.getOrElse {
123+
SparkSubmitUtils.buildIvySettings(repos, ivyRepoPath)
124+
}
125+
112126
val classpath = quietly {
113127
SparkSubmitUtils.resolveMavenCoordinates(
114128
hiveArtifacts.mkString(","),
115-
SparkSubmitUtils.buildIvySettings(
116-
Some("http://www.datanucleus.org/downloads/maven2"),
117-
ivyPath),
129+
ivySettings,
118130
exclusions = version.exclusions)
119131
}
120-
val allFiles = classpath.split(",").map(new File(_)).toSet
121132

122-
// TODO: Remove copy logic.
123-
val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}")
124-
allFiles.foreach(f => FileUtils.copyFileToDirectory(f, tempDir))
125-
logInfo(s"Downloaded metastore jars to ${tempDir.getCanonicalPath}")
126-
tempDir.listFiles().map(_.toURI.toURL)
133+
logInfo(s"Downloaded metastore jars location: $classpath")
134+
classpath.split(",").map(new File(_).toURI.toURL)
127135
}
128136

129137
// A map from a given pair of HiveVersion and Hadoop version to jar files.

0 commit comments

Comments
 (0)