apache · mgummelt · Feb 10, 2016 · Feb 10, 2016 · Apr 17, 2017 · Apr 17, 2017
diff --git a/core/pom.xml b/core/pom.xml
@@ -357,6 +357,34 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-crypto</artifactId>
     </dependency>
+
+    <!--
+     Testing Hive reflection needs hive on the test classpath only.
+     It doesn't need the spark hive modules, so the -Phive flag is not checked.
+      -->
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-exec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-metastore</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libfb303</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

diff --git a/...in/resources/META-INF/services/org.apache.spark.deploy.security.ServiceCredentialProvider b/...in/resources/META-INF/services/org.apache.spark.deploy.security.ServiceCredentialProvider
@@ -0,0 +1,3 @@
+org.apache.spark.deploy.security.HadoopFSCredentialProvider
+org.apache.spark.deploy.security.HBaseCredentialProvider
+org.apache.spark.deploy.security.HiveCredentialProvider
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/ConfigurableCredentialManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/ConfigurableCredentialManager.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import java.util.ServiceLoader
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.Logging
+import org.apache.spark.util.Utils
+
+/**
+ * A ConfigurableCredentialManager to manage all the registered credential providers and offer
+ * APIs for other modules to obtain credentials as well as renewal time. By default
+ * [[HadoopFSCredentialProvider]], [[HiveCredentialProvider]] and [[HBaseCredentialProvider]] will
+ * be loaded in if not explicitly disabled, any plugged-in credential provider wants to be
+ * managed by ConfigurableCredentialManager needs to implement [[ServiceCredentialProvider]]
+ * interface and put into resources/META-INF/services to be loaded by ServiceLoader.
+ *
+ * Also each credential provider is controlled by
+ * spark.security.credentials.{service}.enabled, it will not be loaded in if set to false.
+ * For example, Hive's credential provider [[HiveCredentialProvider]] can be enabled/disabled by
+ * the configuration spark.security.credentials.hive.enabled.
+ */
+private[spark] class ConfigurableCredentialManager(
+    sparkConf: SparkConf,
+    hadoopConf: Configuration,
+    hadoopAccessManager: HadoopAccessManager)
+  extends Logging {
+
+  private val deprecatedProviderEnabledConfigs = List(
+    "spark.yarn.security.tokens.%s.enabled",
+    "spark.yarn.security.credentials.%s.enabled")
+  private val providerEnabledConfig = "spark.security.credentials.%s.enabled"
+
+  // Maintain all the registered credential providers
+  private val credentialProviders = getCredentialProviders
+  logDebug(s"Using the following credential providers: ${credentialProviders.keys.mkString(", ")}.")
+
+  def this(sparkConf: SparkConf, hadoopConf: Configuration) {
+    this(sparkConf, hadoopConf, new DefaultHadoopAccessManager(hadoopConf))
+  }
+
+  def this(sparkConf: SparkConf) {
+    this(sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf))
+  }
+
+  private def getCredentialProviders(): Map[String, ServiceCredentialProvider] = {
+    val providers = loadCredentialProviders
+
+    // Filter out credentials in which spark.security.credentials.{service}.enabled is false.
+    providers
+      .filter(p => isServiceEnabled(p.serviceName))
+      .map(p => (p.serviceName, p))
+      .toMap
+  }
+
+  protected def isServiceEnabled(serviceName: String): Boolean = {
+    val key = providerEnabledConfig.format(serviceName)
+
+    deprecatedProviderEnabledConfigs.foreach { pattern =>
+      val deprecatedKey = pattern.format(serviceName)
+      if (sparkConf.contains(deprecatedKey)) {
+        logWarning(s"${deprecatedKey} is deprecated, using ${key} instead")
+      }
+    }
+
+    val isEnabledDeprecated = deprecatedProviderEnabledConfigs.forall { pattern =>
+      sparkConf
+        .getOption(pattern.format(serviceName))
+        .map(_.toBoolean)
+        .getOrElse(true)
+    }
+
+    sparkConf
+      .getOption(key)
+      .map(_.toBoolean)
+      .getOrElse(isEnabledDeprecated)
+  }
+
+  private def loadCredentialProviders: List[ServiceCredentialProvider] = {
+    ServiceLoader.load(classOf[ServiceCredentialProvider], Utils.getContextOrSparkClassLoader)
+      .asScala.toList
+  }
+
+  /**
+   * Get credential provider for the specified service.
+   */
+  def getServiceCredentialProvider(service: String): Option[ServiceCredentialProvider] = {
+    credentialProviders.get(service)
+  }
+
+  /**
+   * Writes delegation tokens to creds.  Delegation tokens are fetched from all registered
+   * providers.
+   *
+   * @return nearest time of next renewal, Long.MaxValue if all the credentials aren't renewable,
+   *         otherwise the nearest renewal time of any credentials will be returned.
+   */
+  def obtainCredentials(
+    hadoopConf: Configuration,
+    creds: Credentials): Long = {
+    credentialProviders.values.flatMap { provider =>
+      if (provider.credentialsRequired(hadoopConf)) {
+        provider.obtainCredentials(hadoopConf, hadoopAccessManager, creds)
+      } else {
+        logDebug(s"Service ${provider.serviceName} does not require a token." +
+          s" Check your configuration to see if security is disabled or not.")
+        None
+      }
+    }.foldLeft(Long.MaxValue)(math.min)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/DefaultHadoopAccessManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/DefaultHadoopAccessManager.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.security.UserGroupInformation
+
+class DefaultHadoopAccessManager(hadoopConf: Configuration) extends HadoopAccessManager {
+
+  def getTokenRenewer: String = {
+    UserGroupInformation.getCurrentUser.getShortUserName
+  }
+
+  def hadoopFSsToAccess: Set[Path] = {
+    Set(FileSystem.get(hadoopConf).getHomeDirectory)
+  }
+
+  def getTokenRenewalInterval: Option[Long] = {
+    None
+  }
+}
diff --git a/...rn/security/HBaseCredentialProvider.scala → ...oy/security/HBaseCredentialProvider.scala b/...rn/security/HBaseCredentialProvider.scala → ...oy/security/HBaseCredentialProvider.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import scala.reflect.runtime.universe
 import scala.util.control.NonFatal
@@ -34,7 +34,7 @@ private[security] class HBaseCredentialProvider extends ServiceCredentialProvide
 
   override def obtainCredentials(
       hadoopConf: Configuration,
-      sparkConf: SparkConf,
+      hadoopAccessManager: HadoopAccessManager,
       creds: Credentials): Option[Long] = {
     try {
       val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopAccessManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopAccessManager.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+/**
+ * Methods in [[HadoopAccessManager]] return scheduler-specific information related to how Hadoop
+ * delegation tokens should be fetched.
+ */
+private[spark] trait HadoopAccessManager {
+
+  /** The user allowed to renew delegation tokens */
+  def getTokenRenewer: String
+
+  /** The renewal interval, or [[None]] if the token shouldn't be renewed */
+  def getTokenRenewalInterval: Option[Long]
+
+  /** The set of hadoop file systems to fetch delegation tokens for */
+  def hadoopFSsToAccess: Set[Path]
+}
+
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSCredentialProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSCredentialProvider.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.Credentials
+import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
+
+import org.apache.spark.internal.Logging
+
+private[deploy] class HadoopFSCredentialProvider
+    extends ServiceCredentialProvider with Logging {
+  // Token renewal interval, this value will be set in the first call,
+  // if None means no token renewer specified or no token can be renewed,
+  // so cannot get token renewal interval.
+  private var tokenRenewalInterval: Option[Long] = null
+
+  override val serviceName: String = "hadoopfs"
+
+  override def obtainCredentials(
+      hadoopConf: Configuration,
+      hadoopAccessManager: HadoopAccessManager,
+      creds: Credentials): Option[Long] = {
+
+    // NameNode to access, used to get tokens from different FileSystems
+    val tmpCreds = new Credentials()
+    val tokenRenewer = hadoopAccessManager.getTokenRenewer
+    hadoopAccessManager.hadoopFSsToAccess.foreach { dst =>
+      val dstFs = dst.getFileSystem(hadoopConf)
+      logInfo("getting token for: " + dst)
+      dstFs.addDelegationTokens(tokenRenewer, tmpCreds)
+    }
+
+    // Get the token renewal interval if it is not set. It will only be called once.
+    if (tokenRenewalInterval == null) {
+      tokenRenewalInterval = hadoopAccessManager.getTokenRenewalInterval
+    }
+
+    // Get the time of next renewal.
+    val nextRenewalDate = tokenRenewalInterval.flatMap { interval =>
+      val nextRenewalDates = tmpCreds.getAllTokens.asScala
+        .filter(_.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier])
+        .map { t =>
+          val identifier = t.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
+          identifier.getIssueDate + interval
+        }
+      if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min)
+    }
+
+    creds.addAll(tmpCreds)
+    nextRenewalDate
+  }
+}
diff --git a/...arn/security/HiveCredentialProvider.scala → ...loy/security/HiveCredentialProvider.scala b/...arn/security/HiveCredentialProvider.scala → ...loy/security/HiveCredentialProvider.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import java.lang.reflect.UndeclaredThrowableException
 import java.security.PrivilegedExceptionAction
@@ -62,7 +62,7 @@ private[security] class HiveCredentialProvider extends ServiceCredentialProvider
 
   override def obtainCredentials(
       hadoopConf: Configuration,
-      sparkConf: SparkConf,
+      hadoopAccessManager: HadoopAccessManager,
       creds: Credentials): Option[Long] = {
     val conf = hiveConf(hadoopConf)