Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conf/spark-env.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

# Options read when launching programs locally with
# ./bin/run-example or ./bin/spark-submit
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be under "Options read in YARN client mode"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added it there as well, but I'm going to leave it here because it's relevant also to yarn-cluster.

# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
# - SPARK_CLASSPATH, default classpath entries to append
Expand All @@ -17,6 +18,7 @@
# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos

# Options read in YARN client mode
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ private[spark] class SparkSubmitArguments(args: Array[String]) {
if (args.length == 0) printUsageAndExit(-1)
if (primaryResource == null) SparkSubmit.printErrorAndExit("Must specify a primary resource")
if (mainClass == null) SparkSubmit.printErrorAndExit("Must specify a main class with --class")

if (master.startsWith("yarn")) {
val hasHadoopEnv = sys.env.contains("HADOOP_CONF_DIR") || sys.env.contains("YARN_CONF_DIR")
val testing = sys.env.contains("SPARK_TESTING")
if (!hasHadoopEnv && !testing) {
throw new Exception(s"When running with master '$master' " +
"either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")
}
}
}

override def toString = {
Expand Down
9 changes: 2 additions & 7 deletions docs/hadoop-third-party-distributions.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,5 @@ The location of these configuration files varies across CDH and HDP versions, bu
a common location is inside of `/etc/hadoop/conf`. Some tools, such as Cloudera Manager, create
configurations on-the-fly, but offer a mechanisms to download copies of them.

There are a few ways to make these files visible to Spark:

* You can copy these files into `$SPARK_HOME/conf` and they will be included in Spark's
classpath automatically.
* If you are running Spark on the same nodes as Hadoop _and_ your distribution includes both
`hdfs-site.xml` and `core-site.xml` in the same directory, you can set `HADOOP_CONF_DIR`
in `$SPARK_HOME/spark-env.sh` to that directory.
To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh`
to a location containing the configuration files.
Original file line number Diff line number Diff line change
Expand Up @@ -375,9 +375,11 @@ object ClientBase {
val classpathEntries = Option(conf.getStrings(
YarnConfiguration.YARN_APPLICATION_CLASSPATH)).getOrElse(
getDefaultYarnApplicationClasspath())
for (c <- classpathEntries) {
YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim,
File.pathSeparator)
if (classpathEntries != null) {
for (c <- classpathEntries) {
YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim,
File.pathSeparator)
}
}

val mrClasspathEntries = Option(conf.getStrings(
Expand Down