@@ -541,7 +541,7 @@ abstract class RDD[T: ClassTag](
541541 val sampler = new BernoulliCellSampler [T ](lb, ub)
542542 sampler.setSeed(seed + index)
543543 sampler.sample(partition)
544- }, preservesPartitioning = true )
544+ }, isOrderSensitive = true , preservesPartitioning = true )
545545 }
546546
547547 /**
@@ -854,6 +854,29 @@ abstract class RDD[T: ClassTag](
854854 preservesPartitioning)
855855 }
856856
857+ /**
858+ * Return a new RDD by applying a function to each partition of this RDD, while tracking the index
859+ * of the original partition.
860+ *
861+ * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
862+ * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
863+ *
864+ * `isOrderSensitive` indicates whether the function is order-sensitive. If it is order
865+ * sensitive, it may return totally different result when the input order
866+ * is changed. Mostly stateful functions are order-sensitive.
867+ */
868+ private [spark] def mapPartitionsWithIndex [U : ClassTag ](
869+ f : (Int , Iterator [T ]) => Iterator [U ],
870+ preservesPartitioning : Boolean ,
871+ isOrderSensitive : Boolean ): RDD [U ] = withScope {
872+ val cleanedF = sc.clean(f)
873+ new MapPartitionsRDD (
874+ this ,
875+ (_ : TaskContext , index : Int , iter : Iterator [T ]) => cleanedF(index, iter),
876+ preservesPartitioning,
877+ isOrderSensitive = isOrderSensitive)
878+ }
879+
857880 /**
858881 * Zips this RDD with another one, returning key-value pairs with the first element in each RDD,
859882 * second element in each RDD, etc. Assumes that the two RDDs have the *same number of
0 commit comments