-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-13001] [CORE] [MESOS] Prevent getting offers when reached max cores #10924
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
9b314e0
5ef4879
31b2aba
ad2f014
55732fa
0ccd71c
112f136
5b55ae0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,10 +109,14 @@ private[spark] class CoarseMesosSchedulerBackend( | |
| private val slaveOfferConstraints = | ||
| parseConstraintString(sc.conf.get("spark.mesos.constraints", "")) | ||
|
|
||
| // reject offers with mismatched constraints in seconds | ||
| // Reject offers with mismatched constraints in seconds | ||
| private val rejectOfferDurationForUnmetConstraints = | ||
| getRejectOfferDurationForUnmetConstraints(sc) | ||
|
|
||
| // Reject offers when we reached the maximum number of cores for this framework | ||
| private val rejectOfferDurationForReachedMaxCores = | ||
| getRejectOfferDurationForReachedMaxCores(sc) | ||
|
|
||
| // A client for talking to the external shuffle service | ||
| private val mesosExternalShuffleClient: Option[MesosExternalShuffleClient] = { | ||
| if (shuffleServiceEnabled) { | ||
|
|
@@ -279,18 +283,28 @@ private[spark] class CoarseMesosSchedulerBackend( | |
| } | ||
|
|
||
| private def declineUnmatchedOffers(d: SchedulerDriver, offers: Buffer[Offer]): Unit = { | ||
| for (offer <- offers) { | ||
| val id = offer.getId.getValue | ||
| val offerAttributes = toAttributeMap(offer.getAttributesList) | ||
| val mem = getResource(offer.getResourcesList, "mem") | ||
| val cpus = getResource(offer.getResourcesList, "cpus") | ||
| val filters = Filters.newBuilder() | ||
| .setRefuseSeconds(rejectOfferDurationForUnmetConstraints).build() | ||
| offers.foreach { offer => | ||
| declineOffer(d, offer, Some("unmet constraints"), | ||
| Some(rejectOfferDurationForUnmetConstraints)) | ||
| } | ||
| } | ||
|
|
||
| private def declineOffer(d: SchedulerDriver, offer: Offer, reason: Option[String] = None, | ||
| refuseSeconds: Option[Long] = None): Unit = { | ||
|
|
||
| logDebug(s"Declining offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus" | ||
| + s" for $rejectOfferDurationForUnmetConstraints seconds") | ||
| val id = offer.getId.getValue | ||
| val offerAttributes = toAttributeMap(offer.getAttributesList) | ||
| val mem = getResource(offer.getResourcesList, "mem") | ||
| val cpus = getResource(offer.getResourcesList, "cpus") | ||
|
|
||
| d.declineOffer(offer.getId, filters) | ||
| logDebug(s"Declining offer: $id with attributes: $offerAttributes mem: $mem" | ||
| + s" cpu: $cpus for $refuseSeconds seconds" + reason.fold("")(r => s" (reason: $r)")) | ||
|
||
|
|
||
| refuseSeconds match { | ||
| case Some(seconds) => | ||
| val filters = Filters.newBuilder().setRefuseSeconds(seconds).build() | ||
| d.declineOffer(offer.getId, filters) | ||
| case _ => d.declineOffer(offer.getId) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -326,11 +340,12 @@ private[spark] class CoarseMesosSchedulerBackend( | |
| d.launchTasks( | ||
| Collections.singleton(offer.getId), | ||
| offerTasks.asJava) | ||
| } else { // decline | ||
| logDebug(s"Declining offer: $id with attributes: $offerAttributes " + | ||
| s"mem: $offerMem cpu: $offerCpus") | ||
|
|
||
| d.declineOffer(offer.getId) | ||
| } else if (totalCoresAcquired >= maxCores) { | ||
| // Reject an offer for a configurable amount of time to avoid starving other frameworks | ||
| declineOffer(d, offer, Some("reached spark.cores.max"), | ||
| Some(rejectOfferDurationForReachedMaxCores)) | ||
| } else { | ||
| declineOffer(d, offer) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -147,6 +147,19 @@ class CoarseMesosSchedulerBackendSuite extends SparkFunSuite | |
| verifyDeclinedOffer(driver, createOfferId("o1"), true) | ||
| } | ||
|
|
||
| test("mesos declines offers with a filter when reached spark.cores.max") { | ||
| val maxCores = 3 | ||
| setBackend(Map("spark.cores.max" -> maxCores.toString)) | ||
|
|
||
| val executorMemory = backend.executorMemory(sc) | ||
| offerResources(List( | ||
| (executorMemory, maxCores + 1), | ||
| (executorMemory, maxCores + 1))) | ||
|
|
||
| verifyTaskLaunched("o1") | ||
| verifyDeclinedOffer(driver, createOfferId("o2"), true) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't test the new config var. This would have passed before the addition of this feature.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would have failed because the declined offer wouldn't have been passed a filter. It would have passed with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, then can you change the test description to "mesos declines offers with a filter" |
||
| } | ||
|
|
||
| test("mesos assigns tasks round-robin on offers") { | ||
| val executorCores = 4 | ||
| val maxCores = executorCores * 2 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -406,6 +406,20 @@ See the [configuration page](configuration.html) for information on Spark config | |
| If unset it will point to Spark's internal web UI. | ||
| </td> | ||
| </tr> | ||
| <tr> | ||
| <td><code>spark.mesos.rejectOfferDurationForUnmetConstraints</code></td> | ||
| <td><code>120s</code></td> | ||
| <td> | ||
| Set the amount of time for which offers are rejected when constraints are unmet. See <code>spark.mesos.constraints</code>. | ||
| </td> | ||
| </tr> | ||
| <tr> | ||
| <td><code>spark.mesos.rejectOfferDurationForReachedMaxCores</code></td> | ||
|
||
| <td><code>120s</code></td> | ||
| <td> | ||
| Set the amount of time for which offers are rejected when the app already acquired <code>spark.cores.max</code> cores. | ||
|
||
| </td> | ||
| </tr> | ||
| </table> | ||
|
|
||
| # Troubleshooting and Debugging | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
style: