@@ -317,8 +317,23 @@ def sample(self, withReplacement, fraction, seed=None):
317317 Return a sampled subset of this RDD (relies on numpy and falls back
318318 on default random generator if numpy is unavailable).
319319
320- >>> sc.parallelize(range(0, 100)).sample(False, 0.1, 2).collect() #doctest: +SKIP
321- [2, 3, 20, 21, 24, 41, 42, 66, 67, 89, 90, 98]
320+ >>> rdd = sc.parallelize(range(0, 100), 4)
321+ >>> wo = rdd.sample(False, 0.1, 2).collect()
322+ >>> wo_dup = rdd.sample(False, 0.1, 2).collect()
323+ >>> set(wo) == set(wo_dup)
324+ True
325+ >>> wr = rdd.sample(True, 0.2, 5).collect()
326+ >>> wr_dup = rdd.sample(True, 0.2, 5).collect()
327+ >>> set(wr) == set(wr_dup)
328+ True
329+ >>> wo_s10 = rdd.sample(False, 0.3, 10).collect()
330+ >>> wo_s20 = rdd.sample(False, 0.3, 20).collect()
331+ >>> set(wo_s10) != set(wo_s20)
332+ True
333+ >>> wr_s11 = rdd.sample(True, 0.4, 11).collect()
334+ >>> wr_s21 = rdd.sample(True, 0.4, 21).collect()
335+ >>> set(wr_s11) != set(wr_s21)
336+ True
322337 """
323338 assert fraction >= 0.0 , "Negative fraction value: %s" % fraction
324339 return self .mapPartitionsWithIndex (RDDSampler (withReplacement , fraction , seed ).func , True )
0 commit comments