Skip to content

Commit c588470

Browse files
committed
Merge pull request #7 from markhamstra/master-csd
SPY-287 Merging Apache 0.8.2 changes
2 parents 12280b5 + a57cd14 commit c588470

26 files changed

Lines changed: 120 additions & 44 deletions

File tree

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.8.1-csd-3-SNAPSHOT</version>
24+
<version>0.8.2-candidate-csd-1-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.8.1-csd-3-SNAPSHOT</version>
24+
<version>0.8.2-candidate-csd-1-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.8.1-csd-3-SNAPSHOT</version>
24+
<version>0.8.2-candidate-csd-1-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,17 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
246246
new java.util.ArrayList(arr)
247247
}
248248

249+
/**
250+
* Return an array that contains all of the elements in a specific partition of this RDD.
251+
*/
252+
def collectPartitions(partitionIds: Array[Int]): Array[JList[T]] = {
253+
// This is useful for implementing `take` from other language frontends
254+
// like Python where the data is serialized.
255+
import scala.collection.JavaConversions._
256+
val res = context.runJob(rdd, (it: Iterator[T]) => it.toArray, partitionIds, true)
257+
res.map(x => new java.util.ArrayList(x.toSeq)).toArray
258+
}
259+
249260
/**
250261
* Reduces the elements of this RDD using the specified commutative and associative binary operator.
251262
*/

core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import java.util.concurrent.atomic.AtomicLong
2222

2323
import org.apache.spark._
2424

25-
abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
25+
abstract class Broadcast[T](val id: Long) extends Serializable {
2626
def value: T
2727

2828
// We cannot have an abstract readObject here due to some weird issues with

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
409409
// There may be one or more refs to dead workers on this same node (w/ different ID's),
410410
// remove them.
411411
workers.filter { w =>
412-
(w.host == host && w.port == port) && (w.state == WorkerState.DEAD)
412+
(w.host == worker.host && w.port == worker.port) && (w.state == WorkerState.DEAD)
413413
}.foreach { w =>
414414
workers -= w
415415
}

core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class ZooKeeperPersistenceEngine(serialization: Serialization)
7777
}
7878

7979
def deserializeFromFile[T <: Serializable](filename: String)(implicit m: Manifest[T]): T = {
80-
val fileData = zk.getData("/spark/master_status/" + filename)
80+
val fileData = zk.getData(WORKING_DIR + "/" + filename)
8181
val clazz = m.erasure.asInstanceOf[Class[T]]
8282
val serializer = serialization.serializerFor(clazz)
8383
serializer.fromBinary(fileData).asInstanceOf[T]

core/src/main/scala/org/apache/spark/executor/Executor.scala

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,6 @@ private[spark] class Executor(
142142
val tr = runningTasks.get(taskId)
143143
if (tr != null) {
144144
tr.kill()
145-
// We remove the task also in the finally block in TaskRunner.run.
146-
// The reason we need to remove it here is because killTask might be called before the task
147-
// is even launched, and never reaching that finally block. ConcurrentHashMap's remove is
148-
// idempotent.
149-
runningTasks.remove(taskId)
150145
}
151146
}
152147

@@ -168,6 +163,8 @@ private[spark] class Executor(
168163
class TaskRunner(execBackend: ExecutorBackend, taskId: Long, serializedTask: ByteBuffer)
169164
extends Runnable {
170165

166+
object TaskKilledException extends Exception
167+
171168
@volatile private var killed = false
172169
@volatile private var task: Task[Any] = _
173170

@@ -201,9 +198,11 @@ private[spark] class Executor(
201198
// If this task has been killed before we deserialized it, let's quit now. Otherwise,
202199
// continue executing the task.
203200
if (killed) {
204-
logInfo("Executor killed task " + taskId)
205-
execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))
206-
return
201+
// Throw an exception rather than returning, because returning within a try{} block
202+
// causes a NonLocalReturnControl exception to be thrown. The NonLocalReturnControl
203+
// exception will be caught by the catch block, leading to an incorrect ExceptionFailure
204+
// for the task.
205+
throw TaskKilledException
207206
}
208207

209208
attemptedTask = Some(task)
@@ -217,9 +216,7 @@ private[spark] class Executor(
217216

218217
// If the task has been killed, let's fail it.
219218
if (task.killed) {
220-
logInfo("Executor killed task " + taskId)
221-
execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))
222-
return
219+
throw TaskKilledException
223220
}
224221

225222
for (m <- task.metrics) {
@@ -257,6 +254,11 @@ private[spark] class Executor(
257254
execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
258255
}
259256

257+
case TaskKilledException => {
258+
logInfo("Executor killed task " + taskId)
259+
execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))
260+
}
261+
260262
case t: Throwable => {
261263
val serviceTime = (System.currentTimeMillis() - taskStart).toInt
262264
val metrics = attemptedTask.flatMap(t => t.metrics)

core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
287287
}
288288
}
289289
case None =>
290-
logInfo("Ignoring update from TID " + tid + " because its task set is gone")
290+
logInfo("Ignoring update with state %s from TID %s because its task set is gone"
291+
.format(state, tid))
291292
}
292293
} catch {
293294
case e: Exception => logError("Exception in statusUpdate", e)

core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ import java.nio.ByteBuffer
2121
import scala.collection.mutable.ArrayBuffer
2222
import scala.collection.mutable.HashMap
2323

24-
import org.apache.spark.{ExceptionFailure, Logging, SparkEnv, SparkException, Success, TaskState}
24+
import org.apache.spark.{ExceptionFailure, Logging, SparkEnv, SparkException, Success,
25+
TaskEndReason, TaskResultLost, TaskState}
2526
import org.apache.spark.TaskState.TaskState
2627
import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, Pool, Schedulable, Task,
2728
TaskDescription, TaskInfo, TaskLocality, TaskResult, TaskSet, TaskSetManager}
@@ -144,7 +145,18 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
144145
val result = ser.deserialize[TaskResult[_]](serializedData, getClass.getClassLoader) match {
145146
case directResult: DirectTaskResult[_] => directResult
146147
case IndirectTaskResult(blockId) => {
147-
throw new SparkException("Expect only DirectTaskResults when using LocalScheduler")
148+
logDebug("Fetching indirect task result for TID %s".format(tid))
149+
val serializedTaskResult = env.blockManager.getRemoteBytes(blockId)
150+
if (!serializedTaskResult.isDefined) {
151+
/* We won't be able to get the task result if the block manager had to flush the
152+
* result. */
153+
taskFailed(tid, state, serializedData)
154+
return
155+
}
156+
val deserializedResult = ser.deserialize[DirectTaskResult[_]](
157+
serializedTaskResult.get)
158+
env.blockManager.master.removeBlock(blockId)
159+
deserializedResult
148160
}
149161
}
150162
result.metrics.resultSize = serializedData.limit()
@@ -164,18 +176,28 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
164176
val task = taskSet.tasks(index)
165177
info.markFailed()
166178
decreaseRunningTasks(1)
167-
val reason: ExceptionFailure = ser.deserialize[ExceptionFailure](
168-
serializedData, getClass.getClassLoader)
169-
sched.dagScheduler.taskEnded(task, reason, null, null, info, reason.metrics.getOrElse(null))
179+
var failureReason = "unknown"
180+
ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader) match {
181+
case ef: ExceptionFailure =>
182+
failureReason = "Exception failure: %s".format(ef.description)
183+
val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
184+
logInfo("Task loss due to %s\n%s\n%s".format(
185+
ef.className, ef.description, locs.mkString("\n")))
186+
sched.dagScheduler.taskEnded(task, ef, null, null, info, ef.metrics.getOrElse(null))
187+
188+
case TaskResultLost =>
189+
failureReason = "Lost result for TID %s".format(tid)
190+
logWarning(failureReason)
191+
sched.dagScheduler.taskEnded(task, TaskResultLost, null, null, info, null)
192+
193+
case _ => {}
194+
}
170195
if (!finished(index)) {
171196
copiesRunning(index) -= 1
172197
numFailures(index) += 1
173-
val locs = reason.stackTrace.map(loc => "\tat %s".format(loc.toString))
174-
logInfo("Loss was due to %s\n%s\n%s".format(
175-
reason.className, reason.description, locs.mkString("\n")))
176198
if (numFailures(index) > MAX_TASK_FAILURES) {
177-
val errorMessage = "Task %s:%d failed more than %d times; aborting job %s".format(
178-
taskSet.id, index, MAX_TASK_FAILURES, reason.description)
199+
val errorMessage = ("Task %s:%d failed more than %d times; aborting job" +
200+
"(most recent failure: %s").format(taskSet.id, index, MAX_TASK_FAILURES, failureReason)
179201
decreaseRunningTasks(runningTasks)
180202
sched.dagScheduler.taskSetFailed(taskSet, errorMessage)
181203
// need to delete failed Taskset from schedule queue

0 commit comments

Comments
 (0)