-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-5484][GraphX] Periodically do checkpoint in Pregel #15125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3834981
166fd6d
b119e4a
d183a7c
352dcb2
ad82e45
e786838
a25d00c
38e6238
f2efef6
194dc27
9d7e796
dae94aa
dd6c366
2639eb1
11bc349
9a6fd1f
5015b44
24d4ad6
ec62659
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,10 @@ package org.apache.spark.graphx | |
|
|
||
| import scala.reflect.ClassTag | ||
|
|
||
| import org.apache.spark.graphx.util.PeriodicGraphCheckpointer | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.rdd.RDD | ||
| import org.apache.spark.rdd.util.PeriodicRDDCheckpointer | ||
|
|
||
| /** | ||
| * Implements a Pregel-like bulk-synchronous message-passing API. | ||
|
|
@@ -122,27 +125,39 @@ object Pregel extends Logging { | |
| require(maxIterations > 0, s"Maximum number of iterations must be greater than 0," + | ||
| s" but got ${maxIterations}") | ||
|
|
||
| var g = graph.mapVertices((vid, vdata) => vprog(vid, vdata, initialMsg)).cache() | ||
| val checkpointInterval = graph.vertices.sparkContext.getConf | ||
| .getInt("spark.graphx.pregel.checkpointInterval", -1) | ||
| var g = graph.mapVertices((vid, vdata) => vprog(vid, vdata, initialMsg)) | ||
| val graphCheckpointer = new PeriodicGraphCheckpointer[VD, ED]( | ||
| checkpointInterval, graph.vertices.sparkContext) | ||
| graphCheckpointer.update(g) | ||
|
|
||
| // compute the messages | ||
| var messages = GraphXUtils.mapReduceTriplets(g, sendMsg, mergeMsg) | ||
| val messageCheckpointer = new PeriodicRDDCheckpointer[(VertexId, A)]( | ||
| checkpointInterval, graph.vertices.sparkContext) | ||
| messageCheckpointer.update(messages.asInstanceOf[RDD[(VertexId, A)]]) | ||
| var activeMessages = messages.count() | ||
|
|
||
| // Loop | ||
| var prevG: Graph[VD, ED] = null | ||
| var i = 0 | ||
| while (activeMessages > 0 && i < maxIterations) { | ||
| // Receive the messages and update the vertices. | ||
| prevG = g | ||
| g = g.joinVertices(messages)(vprog).cache() | ||
| g = g.joinVertices(messages)(vprog) | ||
| graphCheckpointer.update(g) | ||
|
|
||
| val oldMessages = messages | ||
| // Send new messages, skipping edges where neither side received a message. We must cache | ||
| // messages so it can be materialized on the next line, allowing us to uncache the previous | ||
| // iteration. | ||
| messages = GraphXUtils.mapReduceTriplets( | ||
| g, sendMsg, mergeMsg, Some((oldMessages, activeDirection))).cache() | ||
| g, sendMsg, mergeMsg, Some((oldMessages, activeDirection))) | ||
| // The call to count() materializes `messages` and the vertices of `g`. This hides oldMessages | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should the comment here be updated? |
||
| // (depended on by the vertices of g) and the vertices of prevG (depended on by oldMessages | ||
| // and the vertices of g). | ||
| messageCheckpointer.update(messages.asInstanceOf[RDD[(VertexId, A)]]) | ||
| activeMessages = messages.count() | ||
|
|
||
| logInfo("Pregel finished iteration " + i) | ||
|
|
@@ -154,7 +169,9 @@ object Pregel extends Logging { | |
| // count the iteration | ||
| i += 1 | ||
| } | ||
| messages.unpersist(blocking = false) | ||
| messageCheckpointer.unpersistDataSet() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I don't understand this change. Why do we replace messages.unpersist(blocking = false)with messageCheckpointer.unpersistDataSet()Especially because this adds a new public method to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the thing is we use messageCheckpointer.update to do the cache, to make a pair, we can use it to unpersist data. Please correct me if I understand wrong. |
||
| graphCheckpointer.deleteAllCheckpoints() | ||
| messageCheckpointer.deleteAllCheckpoints() | ||
| g | ||
| } // end of apply | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,11 +15,12 @@ | |
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.mllib.impl | ||
| package org.apache.spark.graphx.util | ||
|
|
||
| import org.apache.spark.SparkContext | ||
| import org.apache.spark.graphx.Graph | ||
| import org.apache.spark.storage.StorageLevel | ||
| import org.apache.spark.util.PeriodicCheckpointer | ||
|
|
||
|
|
||
| /** | ||
|
|
@@ -74,9 +75,8 @@ import org.apache.spark.storage.StorageLevel | |
| * @tparam VD Vertex descriptor type | ||
| * @tparam ED Edge descriptor type | ||
| * | ||
| * TODO: Move this out of MLlib? | ||
| */ | ||
| private[mllib] class PeriodicGraphCheckpointer[VD, ED]( | ||
| private[spark] class PeriodicGraphCheckpointer[VD, ED]( | ||
| checkpointInterval: Int, | ||
| sc: SparkContext) | ||
| extends PeriodicCheckpointer[Graph[VD, ED]](checkpointInterval, sc) { | ||
|
|
@@ -87,10 +87,13 @@ private[mllib] class PeriodicGraphCheckpointer[VD, ED]( | |
|
|
||
| override protected def persist(data: Graph[VD, ED]): Unit = { | ||
| if (data.vertices.getStorageLevel == StorageLevel.NONE) { | ||
| data.vertices.persist() | ||
| /* We need to use cache because persist does not honor the default storage level requested | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't persist better? this could potentially support different storage level later
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to use
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| * when constructing the graph. Only cache does that. | ||
| */ | ||
| data.vertices.cache() | ||
| } | ||
| if (data.edges.getStorageLevel == StorageLevel.NONE) { | ||
| data.edges.persist() | ||
| data.edges.cache() | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know if it's the goal but this isn't thread-safe?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would agree with you, that this is not thread safe. Is that a concern?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
with the limited internal only use, it should be ok