apache · mgummelt · Feb 10, 2016 · Feb 10, 2016 · Apr 17, 2017 · Apr 17, 2017
diff --git a/core/pom.xml b/core/pom.xml
@@ -357,6 +357,31 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-crypto</artifactId>
     </dependency>
+
+    <!--
+     Testing Hive reflection needs hive on the test classpath only.
+     It doesn't need the spark hive modules, so the -Phive flag is not checked.
+      -->
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-exec</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-metastore</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libfb303</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

diff --git a/...in/resources/META-INF/services/org.apache.spark.deploy.security.ServiceCredentialProvider b/...in/resources/META-INF/services/org.apache.spark.deploy.security.ServiceCredentialProvider
@@ -0,0 +1,3 @@
+org.apache.spark.deploy.security.HadoopFSCredentialProvider
+org.apache.spark.deploy.security.HBaseCredentialProvider
+org.apache.spark.deploy.security.HiveCredentialProvider
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -30,6 +30,7 @@ import scala.util.Properties
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.ivy.Ivy
 import org.apache.ivy.core.LogOptions
 import org.apache.ivy.core.module.descriptor._
@@ -45,6 +46,7 @@ import org.apache.ivy.plugins.resolver.{ChainResolver, FileSystemResolver, IBibl
 import org.apache.spark._
 import org.apache.spark.api.r.RUtils
 import org.apache.spark.deploy.rest._
+import org.apache.spark.internal.Logging
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.util._
 
@@ -63,7 +65,7 @@ private[deploy] object SparkSubmitAction extends Enumeration {
  * This program handles setting up the classpath with relevant Spark dependencies and provides
  * a layer over the different cluster managers and deploy modes that Spark supports.
  */
-object SparkSubmit extends CommandLineUtils {
+object SparkSubmit extends CommandLineUtils with Logging {
 
   // Cluster managers
   private val YARN = 1
@@ -564,12 +566,22 @@ object SparkSubmit extends CommandLineUtils {
           // properties and then loaded by SparkConf
           sysProps.put("spark.yarn.keytab", args.keytab)
           sysProps.put("spark.yarn.principal", args.principal)
-
-          UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
         }
       }
     }
 
+
+    // [SPARK-20328]. HadoopRDD calls into a Hadoop library that fetches delegation tokens with
+    // renewer set to the YARN ResourceManager.  Since YARN isn't configured in Mesos mode, we
+    // must trick it into thinking we're YARN.
+    if (clusterManager == MESOS && UserGroupInformation.isSecurityEnabled) {
+      val shortUserName = UserGroupInformation.getCurrentUser.getShortUserName
+      val key = s"spark.hadoop.${YarnConfiguration.RM_PRINCIPAL}"
+
+      logDebug(s"Setting ${key} to ${shortUserName}.")
+      sysProps.put(key, shortUserName)
+    }
+
     // In yarn-cluster mode, use yarn.Client as a wrapper around the user class
     if (isYarnCluster) {
       childMainClass = "org.apache.spark.deploy.yarn.Client"

diff --git a/...urity/ConfigurableCredentialManager.scala → ...urity/ConfigurableCredentialManager.scala b/...urity/ConfigurableCredentialManager.scala → ...urity/ConfigurableCredentialManager.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import java.util.ServiceLoader
 
@@ -41,15 +41,17 @@ import org.apache.spark.util.Utils
  * For example, Hive's credential provider [[HiveCredentialProvider]] can be enabled/disabled by
  * the configuration spark.yarn.security.credentials.hive.enabled.
  */
-private[yarn] final class ConfigurableCredentialManager(
+private[spark] class ConfigurableCredentialManager(
     sparkConf: SparkConf, hadoopConf: Configuration) extends Logging {
   private val deprecatedProviderEnabledConfig = "spark.yarn.security.tokens.%s.enabled"
   private val providerEnabledConfig = "spark.yarn.security.credentials.%s.enabled"
 
   // Maintain all the registered credential providers
-  private val credentialProviders = {
-    val providers = ServiceLoader.load(classOf[ServiceCredentialProvider],
-      Utils.getContextOrSparkClassLoader).asScala
+  private val credentialProviders = getCredentialProviders()
+  logDebug(s"Using the following credential providers: ${credentialProviders.keys.mkString(", ")}.")
+
+  private def getCredentialProviders(): Map[String, ServiceCredentialProvider] = {
+    val providers = loadCredentialProviders
 
     // Filter out credentials in which spark.yarn.security.credentials.{service}.enabled is false.
     providers.filter { p =>
@@ -64,15 +66,22 @@ private[yarn] final class ConfigurableCredentialManager(
     }.map { p => (p.serviceName, p) }.toMap
   }
 
-  /**
+  protected def loadCredentialProviders: List[ServiceCredentialProvider] = {
+    ServiceLoader.load(classOf[ServiceCredentialProvider], Utils.getContextOrSparkClassLoader)
+      .asScala.toList
+  }
+
+    /**
    * Get credential provider for the specified service.
    */
   def getServiceCredentialProvider(service: String): Option[ServiceCredentialProvider] = {
     credentialProviders.get(service)
   }
 
   /**
-   * Obtain credentials from all the registered providers.
+   * Writes delegation tokens to creds.  Delegation tokens are fetched from all registered
+   * providers.
+   *
    * @return nearest time of next renewal, Long.MaxValue if all the credentials aren't renewable,
    *         otherwise the nearest renewal time of any credentials will be returned.
    */
@@ -87,21 +96,4 @@ private[yarn] final class ConfigurableCredentialManager(
       }
     }.foldLeft(Long.MaxValue)(math.min)
   }
-
-  /**
-   * Create an [[AMCredentialRenewer]] instance, caller should be responsible to stop this
-   * instance when it is not used. AM will use it to renew credentials periodically.
-   */
-  def credentialRenewer(): AMCredentialRenewer = {
-    new AMCredentialRenewer(sparkConf, hadoopConf, this)
-  }
-
-  /**
-   * Create an [[CredentialUpdater]] instance, caller should be resposible to stop this intance
-   * when it is not used. Executors and driver (client mode) will use it to update credentials.
-   * periodically.
-   */
-  def credentialUpdater(): CredentialUpdater = {
-    new CredentialUpdater(sparkConf, hadoopConf, this)
-  }
 }
diff --git a/...rn/security/HBaseCredentialProvider.scala → ...oy/security/HBaseCredentialProvider.scala b/...rn/security/HBaseCredentialProvider.scala → ...oy/security/HBaseCredentialProvider.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import scala.reflect.runtime.universe
 import scala.util.control.NonFatal

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSCredentialProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSCredentialProvider.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+
+private[deploy] class HadoopFSCredentialProvider
+    extends ServiceCredentialProvider with Logging {
+  // Token renewal interval, this value will be set in the first call,
+  // if None means no token renewer specified or no token can be renewed,
+  // so cannot get token renewal interval.
+  private var tokenRenewalInterval: Option[Long] = null
+
+  override val serviceName: String = "hadoopfs"
+
+  override def obtainCredentials(
+      hadoopConf: Configuration,
+      sparkConf: SparkConf,
+      creds: Credentials): Option[Long] = {
+    // NameNode to access, used to get tokens from different FileSystems
+    val tmpCreds = new Credentials()
+    val tokenRenewer = getTokenRenewer(hadoopConf)
+    hadoopFSsToAccess(hadoopConf, sparkConf).foreach { dst =>
+      val dstFs = dst.getFileSystem(hadoopConf)
+      logInfo("getting token for: " + dst)
+      dstFs.addDelegationTokens(tokenRenewer, tmpCreds)
+    }
+
+    // Get the token renewal interval if it is not set. It will only be called once.
+    if (tokenRenewalInterval == null) {
+      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf)
+    }
+
+    // Get the time of next renewal.
+    val nextRenewalDate = tokenRenewalInterval.flatMap { interval =>
+      val nextRenewalDates = tmpCreds.getAllTokens.asScala
+        .filter(_.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier])
+        .map { t =>
+          val identifier = t.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
+          identifier.getIssueDate + interval
+        }
+      if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min)
+    }
+
+    creds.addAll(tmpCreds)
+    nextRenewalDate
+  }
+
+  protected def getTokenRenewalInterval(
+    hadoopConf: Configuration,
+    sparkConf: SparkConf): Option[Long] = None
+
+  protected def getTokenRenewer(hadoopConf: Configuration): String = {
+    UserGroupInformation.getCurrentUser.getShortUserName
+  }
+
+  protected def hadoopFSsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
+    Set(FileSystem.get(hadoopConf).getHomeDirectory)
+  }
+}
diff --git a/...arn/security/HiveCredentialProvider.scala → ...loy/security/HiveCredentialProvider.scala b/...arn/security/HiveCredentialProvider.scala → ...loy/security/HiveCredentialProvider.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import java.lang.reflect.UndeclaredThrowableException
 import java.security.PrivilegedExceptionAction

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/ServiceCredentialProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/ServiceCredentialProvider.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+
+import org.apache.spark.SparkConf
+
+/**
+ * A credential provider for a service. User must implement this if they need to access a
+ * secure service from Spark.
+ */
+trait ServiceCredentialProvider {
+
+  /**
+   * Name of the service to provide credentials. This name should unique, Spark internally will
+   * use this name to differentiate credential provider.
+   */
+  def serviceName: String
+
+  /**
+   * To decide whether credential is required for this service. By default it based on whether
+   * Hadoop security is enabled.
+   */
+  def credentialsRequired(hadoopConf: Configuration): Boolean = {
+    UserGroupInformation.isSecurityEnabled
+  }
+
+  /**
+   * Obtain credentials for this service and get the time of the next renewal.
+   * @param hadoopConf Configuration of current Hadoop Compatible system.
+   * @param sparkConf Spark configuration.
+   * @param creds Credentials to add tokens and security keys to.
+   * @return If this Credential is renewable and can be renewed, return the time of the next
+   *         renewal, otherwise None should be returned.
+   */
+  def obtainCredentials(
+      hadoopConf: Configuration,
+      sparkConf: SparkConf,
+      creds: Credentials): Option[Long]
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -21,11 +21,14 @@ import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicBoolean
+import javax.xml.bind.DatatypeConverter
 
 import scala.collection.mutable
 import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
 
+import org.apache.hadoop.security.UserGroupInformation
+
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -174,6 +177,24 @@ private[spark] class CoarseGrainedExecutorBackend(
 
 private[spark] object CoarseGrainedExecutorBackend extends Logging {
 
+  private def addMesosDelegationTokens(driverConf: SparkConf) {
+    val value = driverConf.get("spark.mesos.kerberos.userCredentials")
+    val tokens = DatatypeConverter.parseBase64Binary(value)
+
+    logDebug(s"Found delegation tokens of ${tokens.length} bytes.")
+
+    // Use tokens for HDFS login.
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(driverConf)
+    hadoopConf.set("hadoop.security.authentication", "Token")
+    UserGroupInformation.setConfiguration(hadoopConf)
+
+    // Decode tokens and add them to the current user's credentials.
+    val creds = UserGroupInformation.getCurrentUser.getCredentials
+    val tokensBuf = new java.io.ByteArrayInputStream(tokens)
+    creds.readTokenStorageStream(new java.io.DataInputStream(tokensBuf))
+    UserGroupInformation.getCurrentUser.addCredentials(creds)
+  }
+
   private def run(
       driverUrl: String,
       executorId: String,
@@ -220,6 +241,10 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         SparkHadoopUtil.get.startCredentialUpdater(driverConf)
       }
 
+      if (driverConf.contains("spark.mesos.kerberos.userCredentials")) {
+        addMesosDelegationTokens(driverConf)
+      }
+
       val env = SparkEnv.createExecutorEnv(
         driverConf, executorId, hostname, port, cores, cfg.ioEncryptionKey, isLocal = false)
 

diff --git a/...st/resources/META-INF/services/org.apache.spark.deploy.security.ServiceCredentialProvider b/...st/resources/META-INF/services/org.apache.spark.deploy.security.ServiceCredentialProvider
@@ -0,0 +1 @@
+org.apache.spark.deploy.security.TestCredentialProvider
diff --git a/.../ConfigurableCredentialManagerSuite.scala → .../ConfigurableCredentialManagerSuite.scala b/.../ConfigurableCredentialManagerSuite.scala → .../ConfigurableCredentialManagerSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.Text
@@ -24,7 +24,6 @@ import org.apache.hadoop.security.token.Token
 import org.scalatest.{BeforeAndAfter, Matchers}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.yarn.config._
 
 class ConfigurableCredentialManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
   private var credentialManager: ConfigurableCredentialManager = null
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		org.apache.spark.deploy.security.TestCredentialProvider