Skip to content

Commit 96265a1

Browse files
committed
Update slice method comment and some responding docs.
1 parent e143d7a commit 96265a1

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -520,10 +520,12 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
520520

521521
/** Distribute a local Scala collection to form an RDD.
522522
*
523-
* @note Parallelize acts lazily. If `seq` is a mutable collection and is
524-
* altered after the call to parallelize and before the first action on the
525-
* RDD, the resultant RDD will reflect the modified collection. Pass a copy of
526-
* the argument to avoid this.
523+
* @note Parallelize acts lazily. If `seq` is a mutable collection and is altered after the call
524+
* to parallelize and before the first action on the RDD, the resultant RDD will reflect the
525+
* modified collection. Pass a copy of the argument to avoid this.
526+
*
527+
* @note When splitting Range, the sub Range is exclusive. However the last slice for inclusive
528+
* Range is inclusive.
527529
*/
528530
def parallelize[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
529531
new ParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]())

core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ private object ParallelCollectionRDD {
111111
/**
112112
* Slice a collection into numSlices sub-collections. One extra thing we do here is to treat Range
113113
* collections specially, encoding the slices as other Ranges to minimize memory cost. This makes
114-
* it efficient to run Spark over RDDs representing large sets of numbers.
114+
* it efficient to run Spark over RDDs representing large sets of numbers. And if the collection
115+
* is an inclusive Range, we use inclusive range for the last slice.
115116
*/
116117
def slice[T: ClassTag](seq: Seq[T], numSlices: Int): Seq[Seq[T]] = {
117118
if (numSlices < 1) {
@@ -129,7 +130,7 @@ private object ParallelCollectionRDD {
129130
seq match {
130131
case r: Range => {
131132
positions(r.length, numSlices).zipWithIndex.map({ case ((start, end), index) =>
132-
// If the range is inclusive, include the last element in the last slice
133+
// If the range is inclusive, use inclusive range for the last slice
133134
if (r.isInclusive && index == numSlices - 1) {
134135
new Range.Inclusive(r.start + start * r.step, r.end, r.step)
135136
}

0 commit comments

Comments
 (0)