apache
diff --git a/‎R/pkg/R/context.R‎
Lines changed: 2 additions & 0 deletions b/‎R/pkg/R/context.R‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_basic.R‎
Lines changed: 4 additions & 4 deletions b/‎R/pkg/inst/tests/testthat/test_basic.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎bin/docker-image-tool.sh‎
Lines changed: 8 additions & 2 deletions b/‎bin/docker-image-tool.sh‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 7 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 17 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala‎
Lines changed: 6 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 5 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala‎
Lines changed: 5 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala‎
Lines changed: 10 additions & 7 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/BlockManager.scala‎
Lines changed: 9 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/storage/BlockManager.scala‎
Lines changed: 9 additions & 2 deletions
@@ -305,6 +305,8 @@ setCheckpointDirSC <- function(sc, dirName) {
 #' Currently directories are only supported for Hadoop-supported filesystems.
 #' Refer Hadoop-supported filesystems at \url{https://wiki.apache.org/hadoop/HCFS}.
 #'
+#' Note: A path can be added only once. Subsequent additions of the same path are ignored.
+#'
 #' @rdname spark.addFile
 #' @param path The path of the file to be added
 #' @param recursive Whether to add files recursively from the path. Default is FALSE.
 
@@ -18,9 +18,9 @@
 context("basic tests for CRAN")
 
 test_that("create DataFrame from list or data.frame", {
-  tryCatch( checkJavaVersion(),
+  tryCatch(checkJavaVersion(),
             error = function(e) { skip("error on Java check") },
-            warning = function(e) { skip("warning on Java check") } )
+            warning = function(e) { skip("warning on Java check") })
 
   sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
                  sparkConfig = sparkRTestConfig)
@@ -54,9 +54,9 @@ test_that("create DataFrame from list or data.frame", {
 })
 
 test_that("spark.glm and predict", {
-  tryCatch( checkJavaVersion(),
+  tryCatch(checkJavaVersion(),
             error = function(e) { skip("error on Java check") },
-            warning = function(e) { skip("warning on Java check") } )
+            warning = function(e) { skip("warning on Java check") })
 
   sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
                  sparkConfig = sparkRTestConfig)
 
@@ -49,6 +49,7 @@ function build {
     # Set image build arguments accordingly if this is a source repo and not a distribution archive.
     IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
     BUILD_ARGS=(
+      ${BUILD_PARAMS}
       --build-arg
       img_path=$IMG_PATH
       --build-arg
@@ -57,13 +58,14 @@ function build {
   else
     # Not passed as an argument to docker, but used to validate the Spark directory.
     IMG_PATH="kubernetes/dockerfiles"
-    BUILD_ARGS=()
+    BUILD_ARGS=(${BUILD_PARAMS})
   fi
 
   if [ ! -d "$IMG_PATH" ]; then
     error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
   fi
   local BINDING_BUILD_ARGS=(
+    ${BUILD_PARAMS}
     --build-arg
     base_img=$(image_ref spark)
   )
@@ -101,6 +103,8 @@ Options:
   -t tag      Tag to apply to the built image, or to identify the image to be pushed.
   -m          Use minikube's Docker daemon.
   -n          Build docker image with --no-cache
+  -b arg      Build arg to build or push the image. For multiple build args, this option needs to
+              be used separately for each build arg.
 
 Using minikube when building images will do so directly into minikube's Docker daemon.
 There is no need to push the images into minikube in that case, they'll be automatically
@@ -130,7 +134,8 @@ TAG=
 BASEDOCKERFILE=
 PYDOCKERFILE=
 NOCACHEARG=
-while getopts f:mr:t:n option
+BUILD_PARAMS=
+while getopts f:p:mr:t:n:b: option
 do
  case "${option}"
  in
@@ -139,6 +144,7 @@ do
  r) REPO=${OPTARG};;
  t) TAG=${OPTARG};;
  n) NOCACHEARG="--no-cache";;
+ b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};;
  m)
    if ! which minikube 1>/dev/null; then
      error "Cannot find minikube."
 
@@ -488,9 +488,15 @@ private[spark] class ExecutorAllocationManager(
     newExecutorTotal = numExistingExecutors
     if (testing || executorsRemoved.nonEmpty) {
       executorsRemoved.foreach { removedExecutorId =>
+        // If it is a cached block, it uses cachedExecutorIdleTimeoutS for timeout
+        val idleTimeout = if (blockManagerMaster.hasCachedBlocks(removedExecutorId)) {
+          cachedExecutorIdleTimeoutS
+        } else {
+          executorIdleTimeoutS
+        }
         newExecutorTotal -= 1
         logInfo(s"Removing executor $removedExecutorId because it has been idle for " +
-          s"$executorIdleTimeoutS seconds (new desired total will be $newExecutorTotal)")
+          s"$idleTimeout seconds (new desired total will be $newExecutorTotal)")
         executorsPendingToRemove.add(removedExecutorId)
       }
       executorsRemoved
 
@@ -1496,6 +1496,8 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param path can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
    * use `SparkFiles.get(fileName)` to find its download location.
+   *
+   * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
   def addFile(path: String): Unit = {
     addFile(path, false)
@@ -1516,11 +1518,17 @@ class SparkContext(config: SparkConf) extends Logging {
    * use `SparkFiles.get(fileName)` to find its download location.
    * @param recursive if true, a directory can be given in `path`. Currently directories are
    * only supported for Hadoop-supported filesystems.
+   *
+   * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
   def addFile(path: String, recursive: Boolean): Unit = {
     val uri = new Path(path).toUri
     val schemeCorrectedPath = uri.getScheme match {
-      case null | "local" => new File(path).getCanonicalFile.toURI.toString
+      case null => new File(path).getCanonicalFile.toURI.toString
+      case "local" =>
+        logWarning("File with 'local' scheme is not supported to add to file server, since " +
+          "it is already available on every node.")
+        return
       case _ => path
     }
 
@@ -1555,6 +1563,9 @@ class SparkContext(config: SparkConf) extends Logging {
       Utils.fetchFile(uri.toString, new File(SparkFiles.getRootDirectory()), conf,
         env.securityManager, hadoopConfiguration, timestamp, useCache = false)
       postEnvironmentUpdate()
+    } else {
+      logWarning(s"The path $path has been added already. Overwriting of added paths " +
+       "is not supported in the current version.")
     }
   }
 
@@ -1803,6 +1814,8 @@ class SparkContext(config: SparkConf) extends Logging {
    *
    * @param path can be either a local file, a file in HDFS (or other Hadoop-supported filesystems),
    * an HTTP, HTTPS or FTP URI, or local:/path for a file on every worker node.
+   *
+   * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
   def addJar(path: String) {
     def addJarFile(file: File): String = {
@@ -1849,6 +1862,9 @@ class SparkContext(config: SparkConf) extends Logging {
         if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
           logInfo(s"Added JAR $path at $key with timestamp $timestamp")
           postEnvironmentUpdate()
+        } else {
+          logWarning(s"The jar $path has been added already. Overwriting of added jars " +
+            "is not supported in the current version.")
         }
       }
     }
 
@@ -668,6 +668,8 @@ class JavaSparkContext(val sc: SparkContext)
    * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI.  To access the file in Spark jobs,
    * use `SparkFiles.get(fileName)` to find its download location.
+   *
+   * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
   def addFile(path: String) {
     sc.addFile(path)
@@ -681,6 +683,8 @@ class JavaSparkContext(val sc: SparkContext)
    *
    * A directory can be given if the recursive option is set to true. Currently directories are only
    * supported for Hadoop-supported filesystems.
+   *
+   * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
   def addFile(path: String, recursive: Boolean): Unit = {
     sc.addFile(path, recursive)
@@ -690,6 +694,8 @@ class JavaSparkContext(val sc: SparkContext)
    * Adds a JAR dependency for all tasks to be executed on this SparkContext in the future.
    * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI.
+   *
+   * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
   def addJar(path: String) {
     sc.addJar(path)
 
@@ -432,7 +432,11 @@ package object config {
         "external shuffle service, this feature can only be worked when external shuffle" +
         "service is newer than Spark 2.2.")
       .bytesConf(ByteUnit.BYTE)
-      .createWithDefault(Long.MaxValue)
+      // fetch-to-mem is guaranteed to fail if the message is bigger than 2 GB, so we might
+      // as well use fetch-to-disk in that case.  The message includes some metadata in addition
+      // to the block data itself (in particular UploadBlock has a lot of metadata), so we leave
+      // extra room.
+      .createWithDefault(Int.MaxValue - 512)
 
   private[spark] val TASK_METRICS_TRACK_UPDATED_BLOCK_STATUSES =
     ConfigBuilder("spark.taskMetrics.trackUpdatedBlockStatuses")
 
@@ -697,9 +697,12 @@ private[spark] class TaskSchedulerImpl(
    * do not also submit those same tasks.  That also means that a task completion from an earlier
    * attempt can lead to the entire stage getting marked as successful.
    */
-  private[scheduler] def markPartitionCompletedInAllTaskSets(stageId: Int, partitionId: Int) = {
+  private[scheduler] def markPartitionCompletedInAllTaskSets(
+      stageId: Int,
+      partitionId: Int,
+      taskInfo: TaskInfo) = {
     taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm =>
-      tsm.markPartitionCompleted(partitionId)
+      tsm.markPartitionCompleted(partitionId, taskInfo)
     }
   }
 
 
@@ -84,10 +84,10 @@ private[spark] class TaskSetManager(
   val successful = new Array[Boolean](numTasks)
   private val numFailures = new Array[Int](numTasks)
 
-  // Set the coresponding index of Boolean var when the task killed by other attempt tasks,
-  // this happened while we set the `spark.speculation` to true. The task killed by others
+  // Add the tid of task into this HashSet when the task is killed by other attempt tasks.
+  // This happened while we set the `spark.speculation` to true. The task killed by others
   // should not resubmit while executor lost.
-  private val killedByOtherAttempt: Array[Boolean] = new Array[Boolean](numTasks)
+  private val killedByOtherAttempt = new HashSet[Long]
 
   val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
   private[scheduler] var tasksSuccessful = 0
@@ -735,7 +735,7 @@ private[spark] class TaskSetManager(
       logInfo(s"Killing attempt ${attemptInfo.attemptNumber} for task ${attemptInfo.id} " +
         s"in stage ${taskSet.id} (TID ${attemptInfo.taskId}) on ${attemptInfo.host} " +
         s"as the attempt ${info.attemptNumber} succeeded on ${info.host}")
-      killedByOtherAttempt(index) = true
+      killedByOtherAttempt += attemptInfo.taskId
       sched.backend.killTask(
         attemptInfo.taskId,
         attemptInfo.executorId,
@@ -758,7 +758,7 @@ private[spark] class TaskSetManager(
     }
     // There may be multiple tasksets for this stage -- we let all of them know that the partition
     // was completed.  This may result in some of the tasksets getting completed.
-    sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId)
+    sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId, info)
     // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the
     // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not
     // "deserialize" the value when holding a lock to avoid blocking other threads. So we call
@@ -769,9 +769,12 @@ private[spark] class TaskSetManager(
     maybeFinishTaskSet()
   }
 
-  private[scheduler] def markPartitionCompleted(partitionId: Int): Unit = {
+  private[scheduler] def markPartitionCompleted(partitionId: Int, taskInfo: TaskInfo): Unit = {
     partitionToIndex.get(partitionId).foreach { index =>
       if (!successful(index)) {
+        if (speculationEnabled && !isZombie) {
+          successfulTaskDurations.insert(taskInfo.duration)
+        }
         tasksSuccessful += 1
         successful(index) = true
         if (tasksSuccessful == numTasks) {
@@ -944,7 +947,7 @@ private[spark] class TaskSetManager(
         && !isZombie) {
       for ((tid, info) <- taskInfos if info.executorId == execId) {
         val index = taskInfos(tid).index
-        if (successful(index) && !killedByOtherAttempt(index)) {
+        if (successful(index) && !killedByOtherAttempt.contains(tid)) {
           successful(index) = false
           copiesRunning(index) -= 1
           tasksSuccessful -= 1
 
@@ -130,6 +130,8 @@ private[spark] class BlockManager(
 
   private[spark] val externalShuffleServiceEnabled =
     conf.getBoolean("spark.shuffle.service.enabled", false)
+  private val chunkSize =
+    conf.getSizeAsBytes("spark.storage.memoryMapLimitForTests", Int.MaxValue.toString).toInt
 
   val diskBlockManager = {
     // Only perform cleanup if an external service is not serving our shuffle files.
@@ -660,6 +662,11 @@ private[spark] class BlockManager(
    * Get block from remote block managers as serialized bytes.
    */
   def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
+    // TODO if we change this method to return the ManagedBuffer, then getRemoteValues
+    // could just use the inputStream on the temp file, rather than memory-mapping the file.
+    // Until then, replication can cause the process to use too much memory and get killed
+    // by the OS / cluster manager (not a java OOM, since its a memory-mapped file) even though
+    // we've read the data to disk.
     logDebug(s"Getting remote block $blockId")
     require(blockId != null, "BlockId is null")
     var runningFailureCount = 0
@@ -690,7 +697,7 @@ private[spark] class BlockManager(
       logDebug(s"Getting remote block $blockId from $loc")
       val data = try {
         blockTransferService.fetchBlockSync(
-          loc.host, loc.port, loc.executorId, blockId.toString, tempFileManager).nioByteBuffer()
+          loc.host, loc.port, loc.executorId, blockId.toString, tempFileManager)
       } catch {
         case NonFatal(e) =>
           runningFailureCount += 1
@@ -724,7 +731,7 @@ private[spark] class BlockManager(
       }
 
       if (data != null) {
-        return Some(new ChunkedByteBuffer(data))
+        return Some(ChunkedByteBuffer.fromManagedBuffer(data, chunkSize))
       }
       logDebug(s"The value of block $blockId is null")
     }
Original file line number	Diff line number	Diff line change
`@@ -697,9 +697,12 @@ private[spark] class TaskSchedulerImpl(`
`697`	`697`	`* do not also submit those same tasks. That also means that a task completion from an earlier`
`698`	`698`	`* attempt can lead to the entire stage getting marked as successful.`
`699`	`699`	`*/`
`700`		`- private[scheduler] def markPartitionCompletedInAllTaskSets(stageId: Int, partitionId: Int) = {`
	`700`	`+ private[scheduler] def markPartitionCompletedInAllTaskSets(`
	`701`	`+ stageId: Int,`
	`702`	`+ partitionId: Int,`
	`703`	`+ taskInfo: TaskInfo) = {`
`701`	`704`	`taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm =>`
`702`		`- tsm.markPartitionCompleted(partitionId)`
	`705`	`+ tsm.markPartitionCompleted(partitionId, taskInfo)`
`703`	`706`	`}`
`704`	`707`	`}`
`705`	`708`