-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-9026] Refactor SimpleFutureAction.onComplete to not launch separate thread for every callback #7385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-9026] Refactor SimpleFutureAction.onComplete to not launch separate thread for every callback #7385
Changes from 6 commits
df20ed5
55c41d3
1deed38
d779af8
1e2db7f
1346313
e08623a
b504384
12ddad6
dae8805
c6fdc21
7b22514
1a19268
c9ef8d4
692b3a4
17edbcd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,9 @@ | |
|
|
||
| package org.apache.spark.scheduler | ||
|
|
||
| import scala.concurrent.{Future, Promise} | ||
| import scala.util.Success | ||
|
|
||
| /** | ||
| * An object that waits for a DAGScheduler job to complete. As tasks finish, it passes their | ||
| * results to the given handler function. | ||
|
|
@@ -28,12 +31,18 @@ private[spark] class JobWaiter[T]( | |
| resultHandler: (Int, T) => Unit) | ||
| extends JobListener { | ||
|
|
||
| private val promise = Promise[Unit] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stepping back. If we are using promise anyway, why do we need a separate variable called "jobFinished"? The promise is sufficient in keeping the state of whether the job has finished or not. The rest of the code needs to use |
||
|
|
||
| private var finishedTasks = 0 | ||
|
|
||
| // Is the job as a whole finished (succeeded or failed)? | ||
| @volatile | ||
| private var _jobFinished = totalTasks == 0 | ||
|
|
||
| if (_jobFinished) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zsxwing, this |
||
| promise.complete(Success(Unit)) | ||
| } | ||
|
|
||
| def jobFinished: Boolean = _jobFinished | ||
|
|
||
| // If the job is finished, this will be its result. In the case of 0 task jobs (e.g. zero | ||
|
|
@@ -58,13 +67,15 @@ private[spark] class JobWaiter[T]( | |
| if (finishedTasks == totalTasks) { | ||
| _jobFinished = true | ||
| jobResult = JobSucceeded | ||
| promise.trySuccess() | ||
| this.notifyAll() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line can be removed. Right? |
||
| } | ||
| } | ||
|
|
||
| override def jobFailed(exception: Exception): Unit = synchronized { | ||
| _jobFinished = true | ||
| jobResult = JobFailed(exception) | ||
| promise.tryFailure(exception) | ||
| this.notifyAll() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line can be removed too |
||
| } | ||
|
|
||
|
|
@@ -74,4 +85,10 @@ private[spark] class JobWaiter[T]( | |
| } | ||
| return jobResult | ||
| } | ||
|
|
||
| /** | ||
| * Return a Future to monitoring the job success or failure event. You can use this method to | ||
| * avoid blocking your thread. | ||
| */ | ||
| def toFuture: Future[Unit] = promise.future | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,11 +17,12 @@ | |
|
|
||
| package org.apache.spark | ||
|
|
||
| import scala.concurrent.Await | ||
| import scala.concurrent.{ExecutionContext, Await} | ||
| import scala.concurrent.duration.Duration | ||
|
|
||
| import org.scalatest.{BeforeAndAfter, Matchers} | ||
|
|
||
| import org.apache.spark.util.ThreadUtils | ||
|
|
||
| class FutureActionSuite | ||
| extends SparkFunSuite | ||
|
|
@@ -49,4 +50,20 @@ class FutureActionSuite | |
| job.jobIds.size should be (2) | ||
| } | ||
|
|
||
| test("simple async action callbacks should not tie up execution context threads (SPARK-9026)") { | ||
| val rdd = sc.parallelize(1 to 10, 2).map(_ => Thread.sleep(1000 * 1000)) | ||
| val pool = ThreadUtils.newDaemonCachedThreadPool("SimpleFutureActionTest") | ||
| val executionContext = ExecutionContext.fromExecutorService(pool) | ||
| val job = rdd.countAsync() | ||
| try { | ||
| for (_ <- 1 to 10) { | ||
| job.onComplete(_ => ())(executionContext) | ||
| assert(pool.getLargestPoolSize < 10) | ||
|
||
| } | ||
| } finally { | ||
| job.cancel() | ||
| executionContext.shutdownNow() | ||
| } | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part seems like a bad hack to use
awaitResultto get the result. Rather, there should be aJobWaiter.jobResult(make it public), that returnOption[JobResult]and use that.