@@ -22,10 +22,9 @@ import java.util.{Locale, Timer, TimerTask}
2222import java .util .concurrent .{ConcurrentHashMap , TimeUnit }
2323import java .util .concurrent .atomic .AtomicLong
2424
25- import scala .collection .Set
25+ import scala .collection .{ Set , mutable }
2626import scala .collection .mutable .{ArrayBuffer , HashMap , HashSet }
2727import scala .util .Random
28-
2928import org .apache .spark ._
3029import org .apache .spark .TaskState .TaskState
3130import org .apache .spark .executor .ExecutorMetrics
@@ -118,7 +117,6 @@ private[spark] class TaskSchedulerImpl(
118117 protected val executorIdToHost = new HashMap [String , String ]
119118
120119 private val abortTimer = new Timer (true )
121-
122120 private val clock = new SystemClock
123121
124122 protected val unschedulableTaskSetToExpiryTime = new HashMap [TaskSetManager , Long ]
@@ -430,14 +428,13 @@ private[spark] class TaskSchedulerImpl(
430428 // executor. If we cannot find one, we abort immediately. Else we kill the idle
431429 // executor and kick off an abortTimer which after waiting will abort the taskSet if
432430 // we were unable to schedule any task from the taskSet.
433- // Note 1: We keep a track of schedulability on a per taskSet basis rather than on a
434- // per task basis.
431+ // Note 1: We keep track of schedulability on a per taskSet basis rather than on a per
432+ // task basis.
435433 // Note 2: The taskSet can still be aborted when there are more than one idle
436- // blacklisted executors and dynamic allocation is on. This is because we rely on the
437- // ExecutorAllocationManager to acquire a new executor based on the pending tasks and
438- // it won't release any blacklisted executors which idle timeout after we kill an
439- // executor to acquire a new one, resulting in the abort timer to expire and abort the
440- // taskSet.
434+ // blacklisted executors and dynamic allocation is on. This can happen when a killed
435+ // idle executor isn't replaced in time by ExecutorAllocationManager as it relies on
436+ // pending tasks and doesn't kill executors on idle timeouts, resulting in the abort
437+ // timer to expire and abort the taskSet.
441438 executorIdToRunningTaskIds.find(x => ! isExecutorBusy(x._1)) match {
442439 case Some (x) =>
443440 val executorId = x._1
@@ -465,18 +462,16 @@ private[spark] class TaskSchedulerImpl(
465462 }
466463 case _ => // Abort Immediately
467464 logInfo(" Cannot schedule any task because of complete blacklisting. No idle" +
468- s " executors could be found. Aborting $taskSet. " )
465+ s " executors can be found to kill . Aborting $taskSet. " )
469466 taskSet.abortSinceCompletelyBlacklisted(taskIndex.get)
470467 }
471- case _ => // Do nothing.
472- }
473- } else {
474- // If a task was scheduled, we clear the expiry time for the taskSet. The abort timer
475- // checks this entry to decide if we want to abort the taskSet.
476- if (unschedulableTaskSetToExpiryTime.contains(taskSet)) {
477- unschedulableTaskSetToExpiryTime.remove(taskSet)
478- }
468+ case _ => // Do nothing if no tasks completely blacklisted.
479469 }
470+ } else {
471+ // If a task was scheduled, we clear the expiry time for the taskSet. The abort timer
472+ // checks this entry to decide if we want to abort the taskSet.
473+ unschedulableTaskSetToExpiryTime.remove(taskSet)
474+ }
480475
481476 if (launchedAnyTask && taskSet.isBarrier) {
482477 // Check whether the barrier tasks are partially launched.
0 commit comments