-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-10515] When killing executor, the pending replacement executors should not be lost #8668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
a03023d
773a11e
558cd04
5882a10
cf56d21
2722425
7e0c199
d738641
71a59a3
0041fde
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,6 +66,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| // Executors we have requested the cluster manager to kill that have not died yet | ||
| private val executorsPendingToRemove = new HashSet[String] | ||
|
|
||
| // Number of executors requested from the cluster manager that have not replaced yet | ||
| private var numReplacingExecutors = 0 | ||
|
|
||
| // A map to store hostname with its possible task number running on it | ||
| protected var hostToLocalTaskCount: Map[String, Int] = Map.empty | ||
|
|
||
|
|
@@ -147,6 +150,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| numPendingExecutors -= 1 | ||
| logDebug(s"Decremented number of pending executors ($numPendingExecutors left)") | ||
| } | ||
| if (numReplacingExecutors > 0) { | ||
| numReplacingExecutors -= 1 | ||
| logDebug(s"Decremented number of replaceing executors ($numReplacingExecutors left)") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: "number of executors being replaced" |
||
| } | ||
| } | ||
| // Note: some tests expect the reply to come after we put the executor in the map | ||
| context.reply(RegisteredExecutor) | ||
|
|
@@ -431,7 +438,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| // take into account executors that are pending to be added or removed. | ||
| if (!replace) { | ||
| doRequestTotalExecutors( | ||
| numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size) | ||
| numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size | ||
| + numReplacingExecutors) | ||
| } else { | ||
| numReplacingExecutors += knownExecutors.size | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we really need another variable? Can't we just do This makes sense on a high level too; if we replace an executor we expect to get one back, so it should be pending in the mean time. |
||
| } | ||
|
|
||
| doKillExecutors(executorsToKill) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: "have not been replaced"